summaryrefslogtreecommitdiffstats
path: root/comm/third_party/libgcrypt/cipher
diff options
context:
space:
mode:
Diffstat (limited to 'comm/third_party/libgcrypt/cipher')
-rw-r--r--comm/third_party/libgcrypt/cipher/ChangeLog-20114279
-rw-r--r--comm/third_party/libgcrypt/cipher/Makefile.am258
-rw-r--r--comm/third_party/libgcrypt/cipher/Makefile.in1445
-rw-r--r--comm/third_party/libgcrypt/cipher/arcfour-amd64.S108
-rw-r--r--comm/third_party/libgcrypt/cipher/arcfour.c216
-rw-r--r--comm/third_party/libgcrypt/cipher/asm-common-aarch64.h104
-rw-r--r--comm/third_party/libgcrypt/cipher/asm-common-amd64.h189
-rw-r--r--comm/third_party/libgcrypt/cipher/asm-common-s390x.h90
-rw-r--r--comm/third_party/libgcrypt/cipher/asm-inline-s390x.h157
-rw-r--r--comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h245
-rw-r--r--comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h171
-rw-r--r--comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h140
-rw-r--r--comm/third_party/libgcrypt/cipher/bithelp.h123
-rw-r--r--comm/third_party/libgcrypt/cipher/blake2.c996
-rw-r--r--comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S300
-rw-r--r--comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S278
-rw-r--r--comm/third_party/libgcrypt/cipher/blowfish-amd64.S601
-rw-r--r--comm/third_party/libgcrypt/cipher/blowfish-arm.S743
-rw-r--r--comm/third_party/libgcrypt/cipher/blowfish.c1142
-rw-r--r--comm/third_party/libgcrypt/cipher/bufhelp.h385
-rw-r--r--comm/third_party/libgcrypt/cipher/camellia-aarch64.S586
-rw-r--r--comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S2618
-rw-r--r--comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S1782
-rw-r--r--comm/third_party/libgcrypt/cipher/camellia-arm.S626
-rw-r--r--comm/third_party/libgcrypt/cipher/camellia-glue.c1097
-rw-r--r--comm/third_party/libgcrypt/cipher/camellia.c1413
-rw-r--r--comm/third_party/libgcrypt/cipher/camellia.h95
-rw-r--r--comm/third_party/libgcrypt/cipher/cast5-amd64.S663
-rw-r--r--comm/third_party/libgcrypt/cipher/cast5-arm.S728
-rw-r--r--comm/third_party/libgcrypt/cipher/cast5.c1238
-rw-r--r--comm/third_party/libgcrypt/cipher/chacha20-aarch64.S648
-rw-r--r--comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S601
-rw-r--r--comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S1012
-rw-r--r--comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S393
-rw-r--r--comm/third_party/libgcrypt/cipher/chacha20-ppc.c646
-rw-r--r--comm/third_party/libgcrypt/cipher/chacha20-s390x.S1561
-rw-r--r--comm/third_party/libgcrypt/cipher/chacha20.c1306
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-aeswrap.c209
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-cbc.c292
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-ccm.c415
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-cfb.c317
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-cmac.c292
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-ctr.c120
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-eax.c289
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S341
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S433
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S424
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c712
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-gcm.c1207
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-internal.h809
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-ocb.c761
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-ofb.c108
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-poly1305.c375
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-selftest.c512
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-selftest.h69
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher-xts.c189
-rw-r--r--comm/third_party/libgcrypt/cipher/cipher.c1767
-rw-r--r--comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S497
-rw-r--r--comm/third_party/libgcrypt/cipher/crc-armv8-ce.c229
-rw-r--r--comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c939
-rw-r--r--comm/third_party/libgcrypt/cipher/crc-ppc.c656
-rw-r--r--comm/third_party/libgcrypt/cipher/crc.c955
-rw-r--r--comm/third_party/libgcrypt/cipher/des-amd64.S1111
-rw-r--r--comm/third_party/libgcrypt/cipher/des.c1507
-rw-r--r--comm/third_party/libgcrypt/cipher/dsa-common.c418
-rw-r--r--comm/third_party/libgcrypt/cipher/dsa.c1394
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-common.h140
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-curves.c1603
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-ecdh.c127
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-ecdsa.c248
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-eddsa.c1182
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-gost.c218
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-misc.c438
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc-sm2.c569
-rw-r--r--comm/third_party/libgcrypt/cipher/ecc.c1779
-rw-r--r--comm/third_party/libgcrypt/cipher/elgamal.c1149
-rw-r--r--comm/third_party/libgcrypt/cipher/gost-s-box.c266
-rw-r--r--comm/third_party/libgcrypt/cipher/gost.h34
-rw-r--r--comm/third_party/libgcrypt/cipher/gost28147.c553
-rw-r--r--comm/third_party/libgcrypt/cipher/gostr3411-94.c383
-rw-r--r--comm/third_party/libgcrypt/cipher/hash-common.c193
-rw-r--r--comm/third_party/libgcrypt/cipher/hash-common.h62
-rw-r--r--comm/third_party/libgcrypt/cipher/idea.c382
-rw-r--r--comm/third_party/libgcrypt/cipher/kdf-internal.h40
-rw-r--r--comm/third_party/libgcrypt/cipher/kdf.c503
-rw-r--r--comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S945
-rw-r--r--comm/third_party/libgcrypt/cipher/keccak.c1577
-rw-r--r--comm/third_party/libgcrypt/cipher/keccak_permute_32.h536
-rw-r--r--comm/third_party/libgcrypt/cipher/keccak_permute_64.h385
-rw-r--r--comm/third_party/libgcrypt/cipher/mac-cmac.c524
-rw-r--r--comm/third_party/libgcrypt/cipher/mac-gmac.c187
-rw-r--r--comm/third_party/libgcrypt/cipher/mac-hmac.c1495
-rw-r--r--comm/third_party/libgcrypt/cipher/mac-internal.h275
-rw-r--r--comm/third_party/libgcrypt/cipher/mac-poly1305.c364
-rw-r--r--comm/third_party/libgcrypt/cipher/mac.c808
-rw-r--r--comm/third_party/libgcrypt/cipher/md.c1639
-rw-r--r--comm/third_party/libgcrypt/cipher/md4.c296
-rw-r--r--comm/third_party/libgcrypt/cipher/md5.c322
-rw-r--r--comm/third_party/libgcrypt/cipher/poly1305-internal.h64
-rw-r--r--comm/third_party/libgcrypt/cipher/poly1305-s390x.S87
-rw-r--r--comm/third_party/libgcrypt/cipher/poly1305.c740
-rw-r--r--comm/third_party/libgcrypt/cipher/primegen.c1878
-rw-r--r--comm/third_party/libgcrypt/cipher/pubkey-internal.h105
-rw-r--r--comm/third_party/libgcrypt/cipher/pubkey-util.c1160
-rw-r--r--comm/third_party/libgcrypt/cipher/pubkey.c970
-rw-r--r--comm/third_party/libgcrypt/cipher/rfc2268.c378
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-aarch64.S514
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-aesni.c3965
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-amd64.S477
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-arm.S581
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S1867
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S1613
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-armv8-ce.c414
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-internal.h194
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-padlock.c110
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-ppc-common.h342
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-ppc-functions.h2020
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-ppc.c259
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-ppc9le.c102
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-s390x.c1155
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S874
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64.c743
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-tables.h227
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael.c2032
-rw-r--r--comm/third_party/libgcrypt/cipher/rmd160.c529
-rw-r--r--comm/third_party/libgcrypt/cipher/rsa-common.c1038
-rw-r--r--comm/third_party/libgcrypt/cipher/rsa.c2035
-rw-r--r--comm/third_party/libgcrypt/cipher/salsa20-amd64.S940
-rw-r--r--comm/third_party/libgcrypt/cipher/salsa20-armv7-neon.S899
-rw-r--r--comm/third_party/libgcrypt/cipher/salsa20.c600
-rw-r--r--comm/third_party/libgcrypt/cipher/scrypt.c322
-rw-r--r--comm/third_party/libgcrypt/cipher/seed.c478
-rw-r--r--comm/third_party/libgcrypt/cipher/serpent-armv7-neon.S1124
-rw-r--r--comm/third_party/libgcrypt/cipher/serpent-avx2-amd64.S1160
-rw-r--r--comm/third_party/libgcrypt/cipher/serpent-sse2-amd64.S1211
-rw-r--r--comm/third_party/libgcrypt/cipher/serpent.c1807
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-armv7-neon.S526
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-armv8-aarch32-ce.S220
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-armv8-aarch64-ce.S201
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-avx-amd64.S429
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-avx-bmi2-amd64.S441
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S573
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-intel-shaext.c292
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1-ssse3-amd64.S437
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1.c765
-rw-r--r--comm/third_party/libgcrypt/cipher/sha1.h47
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256-armv8-aarch32-ce.S231
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256-armv8-aarch64-ce.S215
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256-avx-amd64.S506
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S527
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256-intel-shaext.c363
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256-ppc.c795
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256-ssse3-amd64.S528
-rw-r--r--comm/third_party/libgcrypt/cipher/sha256.c857
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512-arm.S464
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512-armv7-neon.S450
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512-avx-amd64.S461
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S502
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512-ppc.c969
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512-ssse3-amd64.S467
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512-ssse3-i386.c404
-rw-r--r--comm/third_party/libgcrypt/cipher/sha512.c1316
-rw-r--r--comm/third_party/libgcrypt/cipher/sm3.c473
-rw-r--r--comm/third_party/libgcrypt/cipher/sm4-aesni-avx-amd64.S987
-rw-r--r--comm/third_party/libgcrypt/cipher/sm4-aesni-avx2-amd64.S851
-rw-r--r--comm/third_party/libgcrypt/cipher/sm4.c1251
-rw-r--r--comm/third_party/libgcrypt/cipher/stribog.c1362
-rw-r--r--comm/third_party/libgcrypt/cipher/tiger.c860
-rw-r--r--comm/third_party/libgcrypt/cipher/twofish-aarch64.S321
-rw-r--r--comm/third_party/libgcrypt/cipher/twofish-amd64.S1184
-rw-r--r--comm/third_party/libgcrypt/cipher/twofish-arm.S363
-rw-r--r--comm/third_party/libgcrypt/cipher/twofish-avx2-amd64.S1048
-rw-r--r--comm/third_party/libgcrypt/cipher/twofish.c1793
-rw-r--r--comm/third_party/libgcrypt/cipher/whirlpool-sse2-amd64.S348
-rw-r--r--comm/third_party/libgcrypt/cipher/whirlpool.c1535
175 files changed, 125928 insertions, 0 deletions
diff --git a/comm/third_party/libgcrypt/cipher/ChangeLog-2011 b/comm/third_party/libgcrypt/cipher/ChangeLog-2011
new file mode 100644
index 0000000000..1ce6bd1e68
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ChangeLog-2011
@@ -0,0 +1,4279 @@
+2011-12-01 Werner Koch <wk@g10code.com>
+
+ NB: ChangeLog files are no longer manually maintained. Starting
+ on December 1st, 2011 we put change information only in the GIT
+ commit log, and generate a top-level ChangeLog file from logs at
+ "make dist". See doc/HACKING for details.
+
+2011-09-16 Werner Koch <wk@g10code.com>
+
+ * primegen.c (_gcry_primegen_init): New.
+
+2011-09-15 Werner Koch <wk@g10code.com>
+
+ * cipher-cbc.c, cipher-cfb.c, cipher-ofb.c, cipher-ctr.c: New.
+ * cipher-aeswrap.c: New.
+ * cipher-internal.h: New.
+ * cipher.c (cipher_context_alignment_t, struct gcry_cipher_handle)
+ (CTX_MAGIC_NORMAL, CTX_MAGIC_SECURE, NEED_16BYTE_ALIGNED_CONTEXT)
+ (MAX_BLOCKSIZE): Move to cipher-internal.h.
+ (do_aeswrap_encrypt, do_aeswrap_encrypt)
+ (do_cbc_encrypt, do_cbc_decrypt, do_ctr_encrypt, do_ctr_decrypt)
+ (do_ofb_encrypt, do_ofb_decrypt, do_ctr_encrypt): Move to the
+ respective new cipher-foo.c files.
+ (do_ctr_decrypt): Remove.
+
+2011-09-15 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_list): Remove.
+ (gcry_pk_unregister): Remove.
+ * md.c (gcry_md_list): Remove.
+ (gcry_md_unregister): Remove.
+ * cipher.c (gcry_cipher_list): Remove.
+ (gcry_cipher_unregister): Remove.
+ * ac.c: Remove.
+
+2011-06-29 Werner Koch <wk@g10code.com>
+
+ * cipher.c (cipher_get_keylen): Return zero for an invalid algorithm.
+ (cipher_get_blocksize): Ditto.
+
+2011-06-13 Werner Koch <wk@g10code.com>
+
+ * dsa.c (selftest_sign_1024): Use the raw and not the pkcs1 flag.
+
+ * pubkey.c (gcry_pk_sign): Special case output generation for PKCS1.
+ (sexp_data_to_mpi): Parse "random-override" for pkcs1 encryption.
+ (pkcs1_encode_for_encryption): Add args RANDOM_OVERRIDE and
+ RANDOM_OVERRIDE_LEN.
+ (gcry_pk_encrypt): Special case output generation for PKCS1.
+ (sexp_data_to_mpi): Use GCRYMPI_FMT_USG for raw encoding.
+
+2011-06-10 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_sign): Use format specifier '%M' to avoid
+ leading zeroes. Special case output generation for PSS.
+ (gcry_pk_encrypt): Special case output generation for OAEP.
+ (sexp_data_to_mpi): Use GCRYMPI_FMT_USG for PSS verify.
+
+2011-06-09 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (oaep_decode): Make use of octet_string_from_mpi.
+ (sexp_to_enc): Skip "random-override".
+
+ * pubkey.c (oaep_encode, pss_encode): Add args RANDOM_OVERRIDE and
+ RANDOM_OVERRIDE_LEN.
+ (sexp_data_to_mpi): Extract new random-override parameter.
+
+ * pubkey.c (pss_encode, pss_verify): Use VALUE verbatim for MHASH.
+ (octet_string_from_mpi): Add arg SPACE.
+
+2011-06-08 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (pss_encode, pss_verify): Restructure and comment code
+ to match rfc-3447. Replace secure allocs by plain allocs and
+ wipememory. Use gcry_md_hash_buffer.
+ (octet_string_from_mpi): New.
+
+2011-06-03 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (oaep_decode): Add more comments and restructure to
+ match the description in RFC-3447.
+ (oaep_encode): Check for mgf1 error. s/dlen/hlen/.
+
+2011-05-31 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (mgf1): Optimize by using gcry_md_reset. Re-implement
+ for easier readability.
+ (oaep_encode): Add more comments and restructure to match the
+ description in RFC-3447.
+
+ * pubkey.c (pkcs1_encode_for_signature, oaep_decode): Change
+ return value from one MPI to a buffer.
+ (gcry_pk_decrypt): Adjust for this change.
+
+2011-05-30 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (pkcs1_decode_for_encryption): Change handling of
+ leading zero byte.
+
+2011-05-27 Daiki Ueno <ueno@unixuser.org>
+
+ * pubkey.c (gcry_pk_decrypt): Fix double-free when un-padding
+ invalid data. Thanks to Tom Ritter.
+
+2011-05-24 Daiki Ueno <ueno@unixuser.org>
+
+ * rsa.c (rsa_verify): Use CMP if given, to check the decrypted
+ sig.
+
+ * pubkey.c (sexp_to_enc, sexp_data_to_mpi): Factor out
+ CTX initialization to ...
+ (init_encoding_ctx): .. new.
+ (gcry_pk_verify): Pass verify func and the arg to pubkey_verify.
+ (pss_encode, pss_verify, pss_verify_cmp): New.
+
+2011-05-23 Daiki Ueno <ueno@unixuser.org>
+
+ * pubkey.c (pkcs1_decode_for_encryption, oaep_decode): Fix memleak
+ when gcry_mpi_print fails.
+
+2011-05-18 Daiki Ueno <ueno@unixuser.org>
+
+ * pubkey.c (sexp_data_to_mpi): Factor some code out to ...
+ (pkcs1_encode_for_encryption): .. new,
+ (pkcs1_encode_for_signature): .. new.
+ (pkcs1_decode_for_encryption): New.
+ (gcry_pk_decrypt): Do un-padding for PKCS#1 as well as OAEP.
+ (sexp_to_enc): Abolish "unpad" flag, which is not necessary since
+ we can do un-padding implicitly when "pkcs1" or "oaep" is given.
+
+2011-05-11 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (sexp_to_enc, sexp_data_to_mpi): Set LABEL to NULL
+ after free.
+ (sexp_to_enc, sexp_data_to_mpi): Do not allow multiple encoding
+ flags.
+ (oaep_encode, oaep_decode, sexp_to_key, sexp_to_sig)
+ (sexp_to_enc, sexp_data_to_mpi, gcry_pk_encrypt, gcry_pk_sign)
+ (gcry_pk_genkey, _gcry_pk_get_elements): Replace access to ERRNO
+ by gpg_err_code_from_syserror.
+
+2011-05-11 Daiki Ueno <ueno@unixuser.org>
+
+ * pubkey.c (sexp_data_to_mpi): Factor some code out to ...
+ (get_hash_algo): .. new.
+ (mgf1, oaep_encode, oaep_decode): New.
+ (sexp_to_enc): Add arg CTX. Remove arg RET_WANT_PKCS1. Support
+ OAEP.
+ (sexp_data_to_mpi): Add arg CTX. Support OAEP.
+ (gcry_pk_encrypt): Pass a CTX to sexp_data_to_mpi.
+ (gcry_pk_decrypt): Pass a CTX tp sexp_to_enc and replace
+ WANT_PKCS1. Implement unpadding for OAEP.
+ (gcry_pk_sign): Pass NULL for CTX arg of sexp_data_to_mpi.
+ (gcry_pk_verify): Ditto.
+
+2011-04-19 Werner Koch <wk@g10code.com>
+
+ * cipher.c (gcry_cipher_open): Replace gpg_err_code_from_errno by
+ gpg_err_code_from_syserror.
+
+2011-04-11 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_get_keygrip): Avoid double free of L2.
+
+ * cipher.c (_gcry_cipher_setctr): Clear unused lastiv info.
+ (gcry_cipher_ctl) <GCRYCTL_SET_CTR>: Implement by calling
+ _gcry_cipher_setctr.
+ (do_ctr_encrypt): Save last counter and reuse it.
+
+ * cipher.c (do_ctr_encrypt): Allow arbitrary length inputs to
+ match the 1.4 behaviour.
+
+2011-04-04 Werner Koch <wk@g10code.com>
+
+ * ecc.c (compute_keygrip): Release L1 while parsing "curve".
+
+ * pubkey.c (gcry_pk_get_keygrip): Always release NAME and L2.
+ Reported by Ben Kibbey.
+
+2011-03-28 Werner Koch <wk@g10code.com>
+
+ * primegen.c (_gcry_generate_elg_prime): Make sure that PRIME is
+ NULL if the called func ever returns an error.
+
+ * pubkey.c (gcry_pk_decrypt): Remove unused var PUBKEY.
+
+2011-03-09 Werner Koch <wk@g10code.com>
+
+ * kdf.c: New.
+
+2011-02-22 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (aesni_cleanup_2_4): New.
+ (aesenc_xmm1_xmm0, do_aesni_ctr_4): New.
+ (_gcry_aes_ctr_enc): New.
+ * cipher.c (struct gcry_cipher_handle): Add CTR_ENC. Move field
+ CTR into an u_ctr union and adjust all users.
+ (gcry_cipher_open): Use _gcry_aes_ctr_enc.
+ (do_ctr_encrypt): Use bulk mode.
+
+2011-02-18 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (u32_a_t): New.
+ (do_encrypt_aligned, do_encrypt_aligned): Use the new type to
+ avoid problems with strict aliasing rules.
+
+2011-02-16 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (do_aesni_cfb) [USE_AESNI]: New.
+ (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec) [USE_AESNI]: Use new fucntion.
+
+2011-02-15 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (do_aesni_enc_aligned, do_aesni_dec_aligned): Use
+ movdqa for the key but keep using movdqu for the data.
+ (do_aesni): Remove alignment detection. Don't burn the stack.
+ (aesni_prepare, aesni_cleanup): New macros.
+ (rijndael_encrypt, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc)
+ (rijndael_decrypt, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use
+ these macros. Don't burn the stack in the USE_AESNI case.
+ (do_setkey): Add disabled code to use aeskeygenassist.
+
+2011-02-14 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (ATTR_ALIGNED_16): New
+ (do_aesni): Do not copy if already aligned.
+ (do_encrypt, do_decrypt): Ditto.
+ (rijndael_decrypt, rijndael_encrypt): Increase stack burning amount.
+
+ * rijndael.c (RIJNDAEL_context): Reorder fields. Change fieldname
+ ROUNDS to rounds. Move padlock_key into u1.
+ (keySched, keySched2): Rename macros to keyscherr and keyschdec
+ and change all users.
+ (padlockkey): New macro. Change all users of padlock_key.
+ * cipher.c (NEED_16BYTE_ALIGNED_CONTEXT): Always define if using gcc.
+ (struct gcry_cipher_handle): Align U_IV to at least 16 byte.
+
+2011-02-13 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (USE_AESNI): New. Define for ia32 and gcc >= 4.
+ (m128i_t) [USE_AESNI]: New.
+ (RIJNDAEL_context) [USE_AESNI]: Add field use_aesni.
+ (do_setkey): Set USE_AESNI for all key lengths.
+ (prepare_decryption) [USE_AESNI]: Use aesimc instn if requested.
+ (do_aesni_enc_aligned, do_aesni_dec_aligned)
+ (do_aesni) [USE_AESNI]: New.
+ (rijndael_encrypt, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc)
+ (rijndael_decrypt, _gcry_aes_cfb_dec)
+ (_gcry_aes_cbc_dec) [USE_AESNI]: Use do_aesni.
+
+2011-02-01 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_get_curve): New.
+ (sexp_to_key): Add arg OVERRIDE_ELEMS.
+ (sexp_elements_extract_ecc): Allow for params only.
+ (gcry_pk_get_param): New.
+ * ecc.c (ecc_get_curve): New.
+ (ecc_get_param_sexp): New.
+
+2011-01-28 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_genkey): Hack to insert the used curve name.
+
+2011-01-27 Werner Koch <wk@g10code.com>
+
+ * ecc.c (fill_in_curve): Remove.
+ (generate_curve): Rename to ..
+ (fill_in_curve): this. Remove setting of NAME_OID.
+ (ecc_encrypt_raw): Change name of arg DATA to K for better
+ readability. Use ECC_public_key instead of ECC_secret_key.
+ Require a caller to pass a complete pkey array.
+ (ecc_decrypt_raw): Require a caller to pass a complete skey array.
+ (elliptic_curve_t): Add field NAME.
+ (fill_in_curve): Set field.
+ (generate_key): Add arg R_USED_CURVE.
+ (ecc_generate_ext): Return used curve name.
+
+2011-01-13 Andrey Jivsov <openpgp@brainhub.org> (wk)
+
+ * ecc.c (ec2os): Do not free passed parameters X and Y. Adjust
+ callers.
+ (ecc_encrypt_raw, ecc_decrypt_raw): New.
+ (ecdh_names, _gcry_pubkey_spec_ecdh): New.
+ * pubkey.c (pubkey_table): Support ECDH.
+
+2010-08-19 Werner Koch <wk@g10code.com>
+
+ * cipher.c (gcry_cipher_open): Remove double release of the module.
+ Fixes bug#1263.
+
+2010-06-10 Jeff Johnson <n3npq@mac.com> (wk)
+
+ * ecc.c (ecc_generate_ext): Parse transient-key flag.
+ (generate_key): Add arg TRANSIENT_KEY and use it to set the random
+ level.
+
+2010-04-12 Brad Hards <bradh@frogmouth.net> (wk)
+
+ Spelling fixes.
+
+2010-03-26 Werner Koch <wk@g10code.com>
+
+ * tiger.c (asn): Unfetter the old TIGER from an OID.
+ (TIGER_CONTEXT): Add field VARIANT.
+ (tiger_init): Factor code out to ...
+ (do_init): New.
+ (tiger1_init, tiger2_init): New.
+ (_gcry_digest_spec_tiger1, _gcry_digest_spec_tiger2): New.
+ * md.c (digest_table): Add TIGER1 and TIGER2 variants.
+
+2009-12-11 Werner Koch <wk@g10code.com>
+
+ * sha256.c (Cho, Maj, Sum0, Sum1): Turn macros into inline
+ functions.
+ (transform): Partly unroll to interweave the chain variables
+
+ * sha512.c (ROTR, Ch, Maj, Sum0, Sum1): Turn macros into inline
+ functions.
+ (transform): Partly unroll to interweave the chain variables.
+ Suggested by Christian Grothoff.
+
+2009-12-10 Werner Koch <wk@g10code.com>
+
+ * Makefile.am (o_flag_munging): New.
+ (tiger.o, tiger.lo): Use it.
+
+ * cipher.c (do_ctr_encrypt): Add arg OUTBUFLEN. Check for
+ suitable value. Add check for valid inputlen. Wipe temporary
+ memory.
+ (do_ctr_decrypt): Likewise.
+ (do_cbc_encrypt, do_cbc_decrypt): Add arg OUTBUFLEN. Check for
+ suitable value. Move check for valid inputlen to here; change
+ returned error from INV_ARG to INV_LENGTH.
+ (do_ecb_encrypt, do_ecb_decrypt): Ditto.
+ (do_cfb_encrypt, do_cfb_decrypt): Ditto.
+ (do_ofb_encrypt, do_ofb_decrypt): Ditto.
+ (cipher_encrypt, cipher_encrypt): Adjust for above changes.
+ (gcry_cipher_encrypt, gcry_cipher_decrypt): Simplify.
+
+2009-12-09 Werner Koch <wk@g10code.com>
+
+ * cipher.c (gcry_cipher_open): Allow for GCRY_CIPHER_MODE_AESWRAP.
+ (cipher_encrypt, cipher_decrypt): Ditto.
+ (do_aeswrap_encrypt, do_aeswrap_decrypt): New.
+ (struct gcry_cipher_handle): Add field marks.
+ (cipher_setkey, cipher_setiv): Update marks flags.
+ (cipher_reset): Reset marks.
+ (cipher_encrypt, cipher_decrypt): Add new arg OUTBUFLEN.
+ (gcry_cipher_encrypt, gcry_cipher_decrypt): Pass outbuflen to
+ cipher_encrypt. Replace GPG_ERR_TOO_SHORT by
+ GPG_ERR_BUFFER_TOO_SHORT.
+
+2009-08-21 Werner Koch <wk@g10code.com>
+
+ * dsa.c (dsa_generate_ext): Release retfactors array before
+ setting it to NULL. Reported by Daiko Ueno.
+
+2009-07-02 Werner Koch <wk@g10code.com>
+
+ * md.c (md_read): Fix incomplete check for NULL.
+ Reported by Fabian Kail.
+
+2009-03-31 Werner Koch <wk@g10code.com>
+
+ * rsa.c (rsa_check_secret_key): Return GPG_ERR_BAD_SECKEY and not
+ GPG_ERR_PUBKEY_ALGO.
+
+2009-02-16 Werner Koch <wk@g10code.com>
+
+ * rsa.c (generate_x931): Do not initialize TBL with automatic
+ variables.
+ * whirlpool.c, tiger.c, sha256.c, sha1.c, rmd160.c, md5.c
+ * md4.c, crc.c: Remove memory.h. This is garbage from gnupg.
+ Reported by Dan Fandrich.
+
+2009-01-22 Werner Koch <wk@g10code.com>
+
+ * ecc.c (compute_keygrip): Remove superfluous const.
+
+2009-01-06 Werner Koch <wk@g10code.com>
+
+ * rmd160.c (oid_spec_rmd160): Add TeleTrust identifier.
+
+2008-12-10 Werner Koch <wk@g10code.com>
+
+ * dsa.c (generate): Add arg DOMAIN and use it if specified.
+ (generate_fips186): Ditto.
+ (dsa_generate_ext): Parse and check the optional "domain"
+ parameter and pass them to the generate functions.
+
+ * rijndael.c (rijndael_names): Add "AES128" and "AES-128".
+ (rijndael192_names): Add "AES-192".
+ (rijndael256_names): Add "AES-256".
+
+2008-12-05 Werner Koch <wk@g10code.com>
+
+ * dsa.c (generate): Add arg TRANSIENT_KEY and use it to detrmine
+ the RNG quality needed.
+ (dsa_generate_ext): Parse the transient-key flag und pass it to
+ generate.
+
+2008-11-28 Werner Koch <wk@g10code.com>
+
+ * dsa.c (generate_fips186): Add arg DERIVEPARMS and use the seed
+ value if available.
+
+ * primegen.c (_gcry_generate_fips186_2_prime): Fix inner p loop.
+
+2008-11-26 Werner Koch <wk@g10code.com>
+
+ * primegen.c (_gcry_generate_fips186_3_prime): New.
+ * dsa.c (generate_fips186): Add arg USE_FIPS186_2.
+ (dsa_generate_ext): Parse new flag use-fips183-2.
+
+2008-11-25 Werner Koch <wk@g10code.com>
+
+ * dsa.c (generate_fips186): New.
+ (dsa_generate_ext): Use new function if derive-parms are given or
+ if in FIPS mode.
+ * primegen.c (_gcry_generate_fips186_2_prime): New.
+
+2008-11-24 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_genkey): Insert code to output extrainfo.
+ (pubkey_generate): Add arg R_EXTRAINFO and pass it to the extended
+ key generation function.
+ * rsa.c (gen_x931_parm_xp, gen_x931_parm_xi): New.
+ (generate_x931): Generate params if not given.
+ (rsa_generate_ext): Parse use-x931 flag. Return p-q-swapped
+ indicator.
+ * dsa.c (dsa_generate_ext): Put RETFACTORS into R_EXTRAINFO if
+ possible.
+
+ * pubkey.c (gcry_pk_genkey): Remove parsing of almost all
+ parameters and pass the parameter S-expression to pubkey_generate.
+ (pubkey_generate): Simplify by requitring modules to parse the
+ parameters. Remove the special cases for Elgamal and ECC.
+ (sexp_elements_extract_ecc): Add arg EXTRASPEC and use it. Fix
+ small memory leak.
+ (sexp_to_key): Pass EXTRASPEC to sexp_elements_extract_ecc.
+ (pubkey_table) [USE_ELGAMAL]: Add real extraspec.
+ * rsa.c (rsa_generate_ext): Adjust for new calling convention.
+ * dsa.c (dsa_generate_ext): Ditto.
+ * elgamal.c (_gcry_elg_generate): Ditto. Rename to elg_generate_ext.
+ (elg_generate): New.
+ (_gcry_elg_generate_using_x): Remove after merging code with
+ elg_generate_ext.
+ (_gcry_pubkey_extraspec_elg): New.
+ (_gcry_elg_check_secret_key, _gcry_elg_encrypt, _gcry_elg_sign)
+ (_gcry_elg_verify, _gcry_elg_get_nbits): Make static and remove
+ _gcry_ prefix.
+ * ecc.c (_gcry_ecc_generate): Rename to ecc_generate_ext and
+ adjust for new calling convention.
+ (_gcry_ecc_get_param): Rename to ecc_get_param and make static.
+ (_gcry_pubkey_extraspec_ecdsa): Add ecc_generate_ext and
+ ecc_get_param.
+
+2008-11-20 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (pubkey_generate): Add arg DERIVEPARMS.
+ (gcry_pk_genkey): Parse derive-parms and pass it to above.
+ * rsa.c (generate_x931): New.
+ (rsa_generate_ext): Add arg DERIVEPARMS and call new function in
+ fips mode or if DERIVEPARMS is given.
+ * primegen.c (_gcry_derive_x931_prime, find_x931_prime): New.
+
+2008-11-19 Werner Koch <wk@g10code.com>
+
+ * rsa.c (rsa_decrypt): Use gcry_create_nonce for blinding.
+ (generate): Rename to generate_std.
+
+2008-11-05 Werner Koch <wk@g10code.com>
+
+ * md.c (md_open): Use a switch to set the Bsize.
+ (prepare_macpads): Fix long key case for SHA384 and SHA512.
+
+ * cipher.c (gcry_cipher_handle): Add field EXTRASPEC.
+ (gcry_cipher_open): Set it.
+ (gcry_cipher_ctl): Add private control code to disable weak key
+ detection and to return the current input block.
+ * des.c (_tripledes_ctx): Add field FLAGS.
+ (do_tripledes_set_extra_info): New.
+ (_gcry_cipher_extraspec_tripledes): Add new function.
+ (do_tripledes_setkey): Disable weak key detection.
+
+2008-10-24 Werner Koch <wk@g10code.com>
+
+ * md.c (digest_table): Allow MD5 in fips mode.
+ (md_register_default): Take special action for MD5.
+ (md_enable, gcry_md_hash_buffer): Ditto.
+
+2008-09-30 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (do_setkey): Properly align "t" and "tk".
+ (prepare_decryption): Properly align "w". Fixes bug #936.
+
+2008-09-18 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_genkey): Parse domain parameter.
+ (pubkey_generate): Add new arg DOMAIN and remove special case for
+ DSA with qbits.
+ * rsa.c (rsa_generate): Add dummy args QBITS, NAME and DOMAIN and
+ rename to rsa_generate_ext. Change caller.
+ (_gcry_rsa_generate, _gcry_rsa_check_secret_key)
+ (_gcry_rsa_encrypt, _gcry_rsa_decrypt, _gcry_rsa_sign)
+ (_gcry_rsa_verify, _gcry_rsa_get_nbits): Make static and remove
+ _gcry_ prefix.
+ (_gcry_pubkey_spec_rsa, _gcry_pubkey_extraspec_rsa): Adjust names.
+ * dsa.c (dsa_generate_ext): New.
+ (_gcry_dsa_generate): Replace code by a call to dsa_generate.
+ (_gcry_dsa_check_secret_key, _gcry_dsa_sign, _gcry_dsa_verify)
+ (_gcry_dsa_get_nbits): Make static and remove _gcry prefix.
+ (_gcry_dsa_generate2): Remove.
+ (_gcry_pubkey_spec_dsa): Adjust to name changes.
+ (_gcry_pubkey_extraspec_rsa): Add dsa_generate_ext.
+
+2008-09-16 Werner Koch <wk@g10code.com>
+
+ * ecc.c (run_selftests): Add arg EXTENDED.
+
+2008-09-12 Werner Koch <wk@g10code.com>
+
+ * rsa.c (test_keys): Do a bad case signature check.
+ * dsa.c (test_keys): Do a bad case check.
+
+ * cipher.c (_gcry_cipher_selftest): Add arg EXTENDED and pass it
+ to the called tests.
+ * md.c (_gcry_md_selftest): Ditto.
+ * pubkey.c (_gcry_pk_selftest): Ditto.
+ * rijndael.c (run_selftests): Add arg EXTENDED and pass it to the
+ called tests.
+ (selftest_fips_128): Add arg EXTENDED and run only one test
+ non-extended mode.
+ (selftest_fips_192): Add dummy arg EXTENDED.
+ (selftest_fips_256): Ditto.
+ * hmac-tests.c (_gcry_hmac_selftest): Ditto.
+ (run_selftests): Ditto.
+ (selftests_sha1): Add arg EXTENDED and run only one test
+ non-extended mode.
+ (selftests_sha224, selftests_sha256): Ditto.
+ (selftests_sha384, selftests_sha512): Ditto.
+ * sha1.c (run_selftests): Add arg EXTENDED and pass it to the
+ called test.
+ (selftests_sha1): Add arg EXTENDED and run only one test
+ non-extended mode.
+ * sha256.c (run_selftests): Add arg EXTENDED and pass it to the
+ called tests.
+ (selftests_sha224): Add arg EXTENDED and run only one test
+ non-extended mode.
+ (selftests_sha256): Ditto.
+ * sha512.c (run_selftests): Add arg EXTENDED and pass it to the
+ called tests.
+ (selftests_sha384): Add arg EXTENDED and run only one test
+ non-extended mode.
+ (selftests_sha512): Ditto.
+ * des.c (run_selftests): Add arg EXTENDED and pass it to the
+ called test.
+ (selftest_fips): Add dummy arg EXTENDED.
+ * rsa.c (run_selftests): Add dummy arg EXTENDED.
+
+ * dsa.c (run_selftests): Add dummy arg EXTENDED.
+
+ * rsa.c (extract_a_from_sexp): New.
+ (selftest_encr_1024): Check that the ciphertext does not match the
+ plaintext.
+ (test_keys): Improve tests and return an error status.
+ (generate): Return an error if test_keys fails.
+ * dsa.c (test_keys): Add comments and return an error status.
+ (generate): Return an error if test_keys failed.
+
+2008-09-11 Werner Koch <wk@g10code.com>
+
+ * rsa.c (_gcry_rsa_decrypt): Return an error instead of calling
+ BUG in case of a practically impossible condition.
+ (sample_secret_key, sample_public_key): New.
+ (selftest_sign_1024, selftest_encr_1024): New.
+ (selftests_rsa): Implement tests.
+ * dsa.c (sample_secret_key, sample_public_key): New.
+ (selftest_sign_1024): New.
+ (selftests_dsa): Implement tests.
+
+2008-09-09 Werner Koch <wk@g10code.com>
+
+ * hmac-tests.c (selftests_sha1): Add tests.
+ (selftests_sha224, selftests_sha384, selftests_sha512): Make up tests.
+
+ * hash-common.c, hash-common.h: New.
+ * sha1.c (selftests_sha1): Add 3 tests.
+ * sha256.c (selftests_sha256, selftests_sha224): Ditto.
+ * sha512.c (selftests_sha512, selftests_sha384): Ditto.
+
+2008-08-29 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_get_keygrip): Remove the special case for RSA
+ and check whether a custom computation function has been setup.
+ * rsa.c (compute_keygrip): New.
+ (_gcry_pubkey_extraspec_rsa): Setup this function.
+ * ecc.c (compute_keygrip): New.
+ (_gcry_pubkey_extraspec_ecdsa): Setup this function.
+
+2008-08-28 Werner Koch <wk@g10code.com>
+
+ * cipher.c (cipher_decrypt, cipher_encrypt): Return an error if
+ mode NONE is used.
+ (gcry_cipher_open): Allow mode NONE only with a debug flag set and
+ if not in FIPS mode.
+
+2008-08-26 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (pubkey_generate): Add arg KEYGEN_FLAGS.
+ (gcry_pk_genkey): Implement new parameter "transient-key" and
+ pass it as flags to pubkey_generate.
+ (pubkey_generate): Make use of an ext_generate function.
+ * rsa.c (generate): Add new arg transient_key and pass appropriate
+ args to the prime generator.
+ (_gcry_rsa_generate): Factor all code out to ...
+ (rsa_generate): .. new func with extra arg KEYGEN_FLAGS.
+ (_gcry_pubkey_extraspec_ecdsa): Setup rsa_generate.
+ * primegen.c (_gcry_generate_secret_prime)
+ (_gcry_generate_public_prime): Add new arg RANDOM_LEVEL.
+
+2008-08-21 Werner Koch <wk@g10code.com>
+
+ * primegen.c (_gcry_generate_secret_prime)
+ (_gcry_generate_public_prime): Use a constant macro for the random
+ level.
+
+2008-08-19 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (sexp_elements_extract_ecc) [!USE_ECC]: Do not allow
+ allow "curve" parameter.
+
+2008-08-15 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (_gcry_pk_selftest): New.
+ * dsa.c (selftests_dsa, run_selftests): New.
+ * rsa.c (selftests_rsa, run_selftests): New.
+ * ecc.c (selftests_ecdsa, run_selftests): New.
+
+ * md.c (_gcry_md_selftest): New.
+ * sha1.c (run_selftests, selftests_sha1): New.
+ * sha256.c (selftests_sha224, selftests_sha256, run_selftests): New.
+ * sha512.c (selftests_sha384, selftests_sha512, run_selftests): New.
+
+ * des.c (selftest): Remove static variable form selftest.
+ (des_setkey): No on-the-fly self test in fips mode.
+ (tripledes_set3keys): Ditto.
+
+ * cipher.c (_gcry_cipher_setkey, _gcry_cipher_setiv):
+
+ * dsa.c (generate): Bail out in fips mode if NBITS is less than 1024.
+ * rsa.c (generate): Return an error code if the the requested size
+ is less than 1024 and we are in fpis mode.
+ (_gcry_rsa_generate): Take care of that error code.
+
+ * ecc.c (generate_curve): In fips mode enable only NIST curves.
+
+ * cipher.c (_gcry_cipher_selftest): New.
+
+ * sha512.c (_gcry_digest_extraspec_sha384)
+ (_gcry_digest_extraspec_sha512): New.
+ * sha256.c (_gcry_digest_extraspec_sha224)
+ (_gcry_digest_extraspec_sha256): New.
+ * sha1.c (_gcry_digest_extraspec_sha1): New.
+ * ecc.c (_gcry_pubkey_extraspec_ecdsa): New.
+ * dsa.c (_gcry_pubkey_extraspec_dsa): New.
+ * rsa.c (_gcry_pubkey_extraspec_rsa): New.
+ * rijndael.c (_gcry_cipher_extraspec_aes)
+ (_gcry_cipher_extraspec_aes192, _gcry_cipher_extraspec_aes256): New.
+ * des.c (_gcry_cipher_extraspec_tripledes): New.
+
+ * cipher.c (gcry_cipher_register): Rename to _gcry_cipher_register.
+ Add arg EXTRASPEC.
+ (dummy_extra_spec): New.
+ (cipher_table_entry): Add extraspec field.
+ * md.c (_gcry_md_register): Rename to _gcry_md_register. Add
+ arg EXTRASPEC.
+ (dummy_extra_spec): New.
+ (digest_table_entry): Add extraspec field.
+ * pubkey.c (gcry_pk_register): Rename to _gcry_pk_register. Add
+ arg EXTRASPEC.
+ (dummy_extra_spec): New.
+ (pubkey_table_entry): Add extraspec field.
+
+ * ac.c: Let most public functions return GPG_ERR_UNSUPPORTED in
+ fips mode.
+
+ * pubkey.c (pubkey_table_entry): Add field FIPS_ALLOWED and mark
+ appropriate algorithms.
+ (dummy_generate, dummy_check_secret_key, dummy_encrypt)
+ (dummy_decrypt, dummy_sign, dummy_verify, dummy_get_nbits): Signal
+ a fips error when used.
+ (gcry_pk_register): In fips mode do not allow to register new
+ algorithms.
+
+ * md.c (digest_table): Add field FIPS_ALLOWED and mark appropriate
+ algorithms.
+ (md_register_default): In fips mode register only fips algorithms.
+ (gcry_md_register): In fips mode do not allow to register new
+ algorithms.
+ (gcry_md_get): Signal a fips error if called.
+ (gcry_md_hash_buffer): Do not allow rmd160 when not in fips mode.
+ (md_start_debug): Disable in fips_mode.
+
+ * md.c (gcry_md_register_default): Rename to ..
+ (md_register_default): .. this.
+ (md_digest): Remove this commented fucntion.
+ * pubkey.c (gcry_pk_register_default): Rename to ..
+ (pk_register_default): .. this.
+
+ * cipher.c (cipher_table_entry): Add field FIPS_ALLOWED.
+ (gcry_cipher_register_default): Register only fips approved
+ algorithms.
+ (gcry_cipher_register): Do not allow to register new ciphers.
+ (cipher_setiv): Signal fips error.
+
+ * cipher (gcry_cipher_register_default): Rename to ..
+ (cipher_register_default): .. this.
+ (REGISTER_DEFAULT_CIPHERS): Adjust for that change.
+
+2008-07-05 Werner Koch <wk@g10code.com>
+
+ * random-daemon.c, random.h, rndhw.c, rndunix.c, rand-internal.h
+ * random.c, rndegd.c, rndlinux.c, rndw32.c: Move to ../cipher.
+ * Makefile.am: Remove random stuff.
+
+2008-06-24 Szakats Istvan <szaki.ms@gmail.com> (wk)
+
+ * ac.c (_gcry_ac_key_destroy, _gcry_ac_key_pair_generate): Relase
+ some more memory.
+
+2008-04-22 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (_gcry_aes_cfb_enc, _gcry_aes_cbc_enc)
+ (_gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use Padlock if possible.
+
+2008-04-18 Werner Koch <wk@g10code.com>
+
+ * sha1.c (transform_aligned): Remove. That is will obviosuly not
+ work because we need a scratch working area and our internal API
+ does not allow to modify the buffers.
+
+ * rijndael.c: Factor tables out to ..
+ * rijndael-tables.h: .. new.
+
+ * ac.c (ac_data_extract): Make static.
+
+ * camellia.h [HAVE_CONFIG_H]: Include config.h.
+
+ * rndw32.c (registry_poll): Only print the performance data
+ problem warning once. Suggested by Simon Josefsson.
+
+2008-03-19 Werner Koch <wk@g10code.com>
+
+ * cipher.c (gcry_cipher_open) [USE_AES]: Init bulk encryption only
+ if requested. Suggested by Dirk Stoecker.
+
+2008-03-18 Werner Koch <wk@g10code.com>
+
+ * sha1.c: Include stdint.h.
+ (transform): Add arg NBLOCKS so that we can work on more than one
+ block and avoid updates of the chaining variables. Changed all
+ callers to use 1.
+ (sha1_write): Replace loop around transform.
+ (transform_aligned) [WORDS_BIGENDIAN]: New.
+ (TRANSFORM): New macro to replace all direct calls of transform.
+
+2008-03-17 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (_gcry_aes_cfb_dec): New.
+ (do_encrypt): Factor code out to ..
+ (do_encrypt_aligned): .. New.
+ (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec): Use new function.
+ (do_decrypt): Factor code out to ..
+ (do_decrypt_aligned): .. new.
+ (_gcry_aes_cbc_enc, _gcry_aes_cbc_dec): New.
+ * cipher.c (struct gcry_cipher_handle): Put field IV into new
+ union U_IV to enforce proper alignment. Change all users.
+ (do_cfb_decrypt): Optimize.
+ (do_cbc_encrypt, do_cbc_decrypt): Optimize.
+
+2008-03-15 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (_gcry_aes_cfb_enc): New.
+ * cipher.c (struct gcry_cipher_handle): Add field ALGO and BULK.
+ (gcry_cipher_open): Set ALGO and BULK.
+ (do_cfb_encrypt): Optimize.
+
+2008-02-18 Werner Koch <wk@g10code.com>
+
+ * rsa.c (_gcry_rsa_verify) [IS_DEVELOPMENT_VERSION]: Print
+ intermediate results.
+
+2008-01-08 Werner Koch <wk@g10code.com>
+
+ * random.c (add_randomness): Do not just increment
+ POOL_FILLED_COUNTER but update it by the actual amount of data.
+
+2007-12-13 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (sexp_data_to_mpi): Support SHA-224.
+
+2007-12-05 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (USE_PADLOCK): Depend on ENABLE_PADLOCK_SUPPORT.
+ * rndhw.c (USE_PADLOCK): Ditto
+
+ * rsa.c (secret): Fixed condition test for using CRT. Reported by
+ Dean Scarff. Fixes bug#864.
+ (_gcry_rsa_check_secret_key): Return an erro if the optional
+ parameters are missing.
+ * pubkey.c (sexp_elements_extract): Add arg ALGO_NAME. Changed all
+ callers to pass NULL. Add hack to allow for optional RSA
+ parameters.
+ (sexp_to_key): Pass algo name to sexp_elements_extract.
+
+2007-12-03 Werner Koch <wk@g10code.com>
+
+ * random.c (gcry_random_add_bytes): Implement it.
+ * rand-internal.h (RANDOM_ORIGIN_EXTERNAL): New.
+
+2007-11-30 Werner Koch <wk@g10code.com>
+
+ * rndhw.c: New.
+ * rndlinux.c (_gcry_rndlinux_gather_random): Try to read 50%
+ directly from the hwrng.
+ * random.c (do_fast_random_poll): Also run the hw rng fast poll.
+ (_gcry_random_dump_stats): Tell whether the hw rng failed.
+
+2007-11-29 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (USE_PADLOCK): Define new macro used for ia32.
+ (RIJNDAEL_context) [USE_PADLOCK]: Add fields USE_PADLOCK and
+ PADLOCK_KEY.
+ (do_setkey) [USE_PADLOCK]: Enable padlock if available for 128 bit
+ AES.
+ (do_padlock) [USE_PADLOCK]: New.
+ (rijndael_encrypt, rijndael_decrypt) [USE_PADLOCK]: Divert to
+ do_padlock.
+ * cipher.c (cipher_context_alignment_t): New. Use it in this
+ module in place of PROPERLY_ALIGNED_TYPE.
+ (NEED_16BYTE_ALIGNED_CONTEXT): Define macro for ia32.
+ (struct gcry_cipher_handle): Add field HANDLE_OFFSET.
+ (gcry_cipher_open): Take care of increased alignment requirements.
+ (gcry_cipher_close): Ditto.
+
+2007-11-28 Werner Koch <wk@g10code.com>
+
+ * sha256.c (asn224): Fixed wrong template. It happened due to a
+ bug in RFC4880. SHA-224 is not in the stable version of libgcrypt
+ so the consequences are limited to users of this devel version.
+
+2007-10-31 Werner Koch <wk@g10code.com>
+
+ * ac.c (gcry_ac_data_new): Remove due to the visibility wrapper.
+ (gcry_ac_data_destroy, gcry_ac_data_copy, gcry_ac_data_length)
+ (gcry_ac_data_set, gcry_ac_data_get_name, gcry_ac_data_get_index)
+ (gcry_ac_data_to_sexp, gcry_ac_data_from_sexp)
+ (gcry_ac_data_clear, gcry_ac_io_init, gcry_ac_open)
+ (gcry_ac_close, gcry_ac_key_init, gcry_ac_key_pair_generate)
+ (gcry_ac_key_pair_extract, gcry_ac_key_destroy)
+ (gcry_ac_key_pair_destroy, gcry_ac_key_data_get)
+ (gcry_ac_key_test, gcry_ac_key_get_nbits, gcry_ac_key_get_grip)
+ (gcry_ac_data_encrypt, gcry_ac_data_decrypt, gcry_ac_data_sign)
+ (gcry_ac_data_verify, gcry_ac_data_encode, gcry_ac_data_decode)
+ (gcry_ac_mpi_to_os, gcry_ac_mpi_to_os_alloc, gcry_ac_os_to_mpi)
+ (gcry_ac_data_encrypt_scheme, gcry_ac_data_decrypt_scheme)
+ (gcry_ac_data_sign_scheme, gcry_ac_data_verify_scheme)
+ (gcry_ac_io_init_va): Ditto.
+ (gcry_ac_id_to_name, gcry_ac_name_to_id): Remove as these
+ deprecated functions are now implemented by visibility.c.
+
+2007-10-26 Werner Koch <wk@g10code.com>
+
+ * rndw32.c: Disable debug flag.
+
+2007-10-25 Werner Koch <wk@g10code.com>
+
+ * rndw32.c: Updated from current cryptlib snapshot and modified
+ for our use. Removed support from pre NT systems.
+ (slow_gatherer_windows95): Remove.
+ (_gcry_rndw32_gather_random): Require an NT platform.
+ (init_system_rng, read_system_rng, read_mbm_data): New.
+ (slow_gatherer_windowsNT): Rename to ...
+ (slow_gatherer): .. this. Read system RNG and MBM.
+ (registry_poll): New with code factored out from slow_gatherer.
+
+2007-08-23 Werner Koch <wk@g10code.com>
+
+ * random.c (pool_filled_counter): New.
+ (add_randomness): Use it.
+
+2007-08-22 Werner Koch <wk@g10code.com>
+
+ * rndw32.c, rndunix.c: Switched to LGPL.
+
+2007-05-30 Werner Koch <wk@g10code.com>
+
+ * camellia.h, camellia.c: Replace by new LGPL version and adjusted
+ camellia.h.
+
+2007-05-09 Marcus Brinkmann <marcus@g10code.de>
+
+ * ac.c (_gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read):
+ Adjust users of gcry_ac_io_t because union is not anonymous
+ anymore.
+
+2007-05-02 Werner Koch <wk@g10code.com>
+
+ * camellia-glue.c (camellia_setkey, camellia_encrypt)
+ (camellia_decrypt): Recalculated used stack size in called
+ functions.
+ * camellia.h: Redefine external symbols.
+
+2007-05-02 David Shaw <dshaw@jabberwocky.com>
+
+ * Makefile.am, cipher.c: Add Camellia.
+
+ * camellia-glue.c: New. The necessary glue to interface libgcrypt
+ to the stock NTT Camellia distribution.
+
+ * camellia.h, camellia.c: The stock NTT Camellia distribution
+ (GPL).
+
+2007-04-30 David Shaw <dshaw@jabberwocky.com>
+
+ * cipher.c: Use #if instead of #ifdef as configure defines the
+ USE_cipher defines as 0 for disabled.
+
+2007-04-30 Werner Koch <wk@g10code.com>
+
+ * rndegd.c (_gcry_rndegd_set_socket_name): New.
+
+2007-04-30 Marcus Brinkmann <marcus@g10code.de>
+
+ * ecc.c (ec2os): Fix relocation of short numbers.
+
+ * ecc.c (generate_key): Do not allocate D, which will be allocated
+ by GEN_K. Remove G. Fix test if g_x, g_y resp. q_x, q_y are
+ requested.
+ (_gcry_ecc_generate): Release unneeded members of SK.
+ * pubkey.c (sexp_to_key): Release NAME.
+
+2007-04-28 Marcus Brinkmann <marcus@g10code.de>
+
+ * ac.c (gcry_ac_mpi): Remove member NAME_PROVIDED.
+ (ac_data_mpi_copy, _gcry_ac_data_set, _gcry_ac_data_get_name)
+ (_gcry_ac_data_get_index, ac_data_construct): Adjust handling of
+ NAME accordingly.
+
+2007-04-20 Werner Koch <wk@g10code.com>
+
+ * ecc.c (domain_parms): Add standard brainpool curves.
+
+2007-04-18 Werner Koch <wk@g10code.com>
+
+ * ecc.c (generate_curve): Implement alias mechanism.
+
+ * pubkey.c (sexp_elements_extract_ecc): New.
+ (sexp_to_key): Add special case for ecc.
+ (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_genkey): Replace
+ name_terminated stuff by a call to _gcry_sexp_nth_string.
+ (gcry_pk_get_keygrip): Ditto.
+
+2007-04-16 Werner Koch <wk@g10code.com>
+
+ * ecc.c (_gcry_ecc_generate): Renamed DUMMY to CURVE and use it.
+
+2007-04-13 Marcus Brinkmann <marcus@g10code.de>
+
+ * ac.c (ac_data_construct): Cast const away to suppress compiler
+ warning.
+
+ * ecc.c (ecc_generate): Avoid compiler warning for unused argument
+ DUMMY.
+ (ecc_verify): Avoid compiler warning for unused arguments CMP and
+ OPAQUEV.
+
+2007-04-06 Werner Koch <wk@g10code.com>
+
+ * sha1.c (oid_spec_sha1): Add another oid from X9.62.
+
+2007-03-28 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_genkey): Do not issue misc-key-info if it is
+ empty.
+ (gcry_pk_genkey): New parameter "curve".
+
+ * ecc.c: Entirely rewritten with only a few traces of the old
+ code left.
+ (_gcry_ecc_generate): New.
+ (generate_key) New arg NAME.
+ (generate_curve): Ditto. Return actual number of NBITS.
+
+2007-03-26 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_genkey): Increase size of SKEY array and add a
+ runtime bounds check.
+
+2007-03-23 Werner Koch <wk@g10code.com>
+
+ * ecc.c (ecc_ctx_init, ecc_ctx_free, ecc_mod, ecc_mulm): New.
+ (duplicate_point, sum_points, escalar_mult): Don't use a
+ copy of base->p. Replaced all mpi_mulm by ecc_mulm so that we can
+ experiment with different algorithms.
+ (generate_key, check_secret_key, sign, verify): Initialize a
+ computation context for use by ecc_mulm.
+
+2007-03-22 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (pubkey_table): Initialize ECC.
+ * Makefile.am (EXTRA_libcipher_la_SOURCES): Add ecc.c.
+ * ecc.c: New. Heavily reformatted and changed for use in libgcrypt.
+ (point_init): New.
+ (escalar_mult): Make arg R the first arg to be similar to the mpi
+ functions.
+ (duplicate_point): Ditto
+ (sum_points): Ditto
+ (sign, verify): Remove unneeded copy operations.
+ (sum_points): Removed memory leaks and optimized some compares.
+ (verify): Simplified input check.
+
+2007-03-14 Werner Koch <wk@g10code.com>
+
+ * random.c (MASK_LEVEL): Removed macro as it was used only at one
+ place. Open coded it there.
+ (gcry_randomize, _gcry_update_random_seed_file)
+ (_gcry_fast_random_poll): Factor lock code out to ..
+ (lock_pool, unlock_pool): .. new.
+ (initialize): Look the pool while allocating.
+ (read_random_source, do_fast_random_poll): Moved intialization to ...
+ (initialize): .. here.
+ (_gcry_enable_quick_random_gen): No more need for initialization.
+ (is_initialized): Moved this global flag to ..
+ (initialize): .. here and changed all users to unconditionally call
+ initialize.
+ (add_randomness): Remove initalization here. It simply can't
+ happen.
+
+ * random.c (enum random_origins): Moved to ..
+ * rand-internal.h: .. here.
+ * rndunix.c (_gcry_rndunix_gather_random): Use enum in prototype
+ for ORIGIN and renamed REQUESTOR to ORIGIN.
+ * rndegd.c (_gcry_rndegd_gather_random): Ditto.
+ * rndlinux.c (_gcry_rndlinux_gather_random): Ditto.
+ * rndw32.c (_gcry_rndw32_gather_random): Ditto.
+ (_gcry_rndw32_gather_random_fast): Ditto.
+
+2007-03-13 Werner Koch <wk@g10code.com>
+
+ * random.c (enum random_origins): New.
+ (add_randomness): Renamed arg SOURCE to ORIGIN.
+ (read_random_source): Renamed arg REQUESTOR to ORIGIN.
+ (getfnc_gather_random): Removed static variable because this
+ function is only called one and thus we don't need this
+ optimization.
+ (_gcry_quick_random_gen): Removed and replaced by..
+ (_gcry_enable_quick_random_gen): .. this. It is onlyu used to
+ enable it and it does not make sense to disable it later. Changed
+ the only one caller too.
+ (get_random_bytes): Removed.
+ (gcry_random_bytes, gcry_random_bytes_secure): Implement in terms
+ of gcry_randomize.
+ * random-daemon.c (_gcry_daemon_get_random_bytes): Removed.
+
+2007-02-23 Werner Koch <wk@g10code.com>
+
+ * elgamal.c (generate): Removed unused variable TEMP.
+ (test_keys): New arg NODIE.
+ (generate_using_x, _gcry_elg_generate_using_x): New.
+ * pubkey.c (pubkey_generate): New arg XVALUE and direct call to
+ the new elgamal generate fucntion.
+ (gcry_pk_genkey): Parse the new "xvalue" tag.
+
+2007-02-22 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (sexp_data_to_mpi): Handle dynamically allocated
+ algorithms. Suggested by Neil Dunbar. Fixes bug#596.
+
+ * rndw32.c (_gcry_rndw32_gather_random_fast): Make it return void.
+
+ * cipher.c (gcry_cipher_algo_name): Simplified.
+
+ * random.c: Use the daemon only if compiled with USE_RANDOM_DAEMON.
+
+ * Makefile.am (libcipher_la_SOURCES): Build random-daemon support
+ only if requested.
+
+2007-02-21 Werner Koch <wk@g10code.com>
+
+ * random.c (rndpool, keypool): Make unsigned.
+ (mix_pool): Change char* variables to unsigned char*.
+ (gcry_randomize): Make arg BUFFER a void*.
+ (gcry_create_nonce): Ditto.
+
+ * rmd160.c (gcry_rmd160_mixblock): Make BUFFER a void*.
+ (_gcry_rmd160_hash_buffer): Make OUTBUF and BUFFER void*.
+ * sha1.c (_gcry_sha1_hash_buffer): Ditto.
+
+ * cipher.c (gcry_cipher_encrypt, cry_cipher_decrypt): Change
+ buffer args to void*.
+ (gcry_cipher_register): Make ALGORITHM_ID a int *.
+
+ * md.c (md_start_debug): Make SUFFIX a const char*. Use snprintf.
+ (gcry_md_debug): New.
+ (gcry_md_ctl): Changed arg BUFFER from unsigned char*.
+
+ * md.c (md_write): Make INBUF a const void*.
+ (gcry_md_write): Remove needless cast.
+ * crc.c (crc32_write): Make INBUF a const void*
+ (update_crc32, crc24rfc2440_write): Ditto.
+ * sha512.c (sha512_write, transform): Ditto.
+ * sha256.c (sha256_write, transform): Ditto.
+ * rmd160.c (rmd160_write, transform): Ditto.
+ * md5.c (md5_write, transform): Ditto.
+ * md4.c (md4_write, transform): Ditto.
+ * sha1.c (sha1_write, transform): Ditto.
+
+ * tiger.c (tiger_write, transform): Ditto.
+ * whirlpool.c (whirlpool_write, whirlpool_add, transform): Ditto.
+
+ * elgamal.c (elg_names): Change to a const*.
+ * dsa.c (dsa_names): Ditto.
+ * rsa.c (rsa_names): Ditto.
+ * pubkey.c (gcry_pk_lookup_func_name): Make ALIASES a const.
+
+2007-02-20 Werner Koch <wk@g10code.com>
+
+ * rndlinux.c (open_device): Remove unsused arg MINOR.
+
+2007-01-30 Werner Koch <wk@g10code.com>
+
+ * sha256.c (oid_spec_sha256): Add alias from pkcs#1.
+ * sha512.c (oid_spec_sha512): Ditto.
+ (oid_spec_sha384): Ditto.
+
+2006-12-18 Werner Koch <wk@g10code.com>
+
+ * rndlinux.c (set_cloexec_flag): New.
+ (open_device): Set close-on-exit flags. Suggested by Max
+ Kellermann. Fixes Debian#403613.
+
+ * Makefile.am (AM_CPPFLAGS, AM_CFLAGS): Splitted and merged
+ Moritz' changes.
+ (INCLUDES): Removed.
+
+2006-11-30 Werner Koch <wk@g10code.com>
+
+ * serpent.c (byte_swap_32): Remove trailing semicolon.
+
+2006-11-15 Werner Koch <wk@g10code.com>
+
+ * Makefile.am (INCLUDES): Include ../src/
+
+2006-11-03 Werner Koch <wk@g10code.com>
+
+ * random.c [HAVE_GETTIMEOFDAY]: Included sys/time.h and not
+ sys/times.h. Reported by Rafaël Carré.
+
+2006-11-05 Moritz Schulte <moritz@g10code.com>
+
+ * Makefile.am (AM_CFLAGS): Added -I$(top_builddir)/src so that the
+ new gcrypt.h is used, not the one installed in the system.
+
+2006-10-25 Werner Koch <wk@g10code.com>
+
+ * primegen.c (prime_generate_internal): Tweaked use of secure
+ memory and entropy use. Safe unused primes from the pool. Allocate
+ at least a pool of 30.
+ (save_pool_prime, get_pool_prime): New.
+
+2006-10-23 Werner Koch <wk@g10code.com>
+
+ * ac.c (_gcry_ac_data_from_sexp): Reset sexp_tmp for failsafe
+ means. Release sexp_cur if needed. Reported by Dirk Stoecker.
+
+ * pubkey.c (pubkeys_registered_lock): Intialized it. It is not
+ realy needed because this is a mere initialization to 0 anyway.
+ Noted by Victor Stinner.
+
+2006-10-17 Werner Koch <wk@g10code.com>
+
+ * dsa.c (_gcry_dsa_generate2): New.
+ (generate): New arg QBITS. Add sanity checks for reasonable qbits
+ and nbits.
+ * pubkey.c (gcry_pk_genkey): Parse an qbits element.
+ (pubkey_generate): New arg QBITS. Pass it to the DSA generation.
+
+2006-10-05 Werner Koch <wk@g10code.com>
+
+ * md.c (gcry_md_algo_info) <get_asnoid>: Check that the algo is
+ available.
+
+2006-10-04 David Shaw <dshaw@jabberwocky.com> (wk)
+
+ * tiger.c (round): Rename to tiger_round as gcc 4 has a built-in
+ round function that this conflicts with.
+
+2006-09-11 Werner Koch <wk@g10code.com>
+
+ * rndw32.c (slow_gatherer_windowsNT): While adding data use the
+ size of the diskPerformance and not its address. Has been fixed in
+ GnuPG more than a year ago. Noted by Lee Fisher.
+
+2006-08-30 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (sexp_data_to_mpi): Need to allow "ripemd160" here as
+ this is the canonical name.
+
+2006-08-29 Hye-Shik Chang <perky@FreeBSD.org> (wk)
+
+ * seed.c: New.
+
+2006-08-03 Werner Koch <wk@g10code.com>
+
+ * random-daemon.c (_gcry_daemon_initialize_basics): Don't
+ initialize the socket. Remove arg SOCKETNAME.
+ (connect_to_socket): Make sure that daemon is set to -1 on error.
+ (call_daemon): Initialize the socket on the first call.
+ (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes)
+ (_gcry_daemon_create_nonce): New arg SOCKETNAME.
+ * random.c (initialize): Call new daemon initializator.
+ (get_random_bytes, gcry_randomize, gcry_create_nonce): Pass socket
+ name to daemon call and reset allow_daemon on failure.
+
+2006-07-26 Werner Koch <wk@g10code.com>
+
+ * rmd160.c (_gcry_rmd160_mixblock): Add cast to transform call.
+
+ * blowfish.c (selftest): Cast string to usnigned char*.
+
+ * primegen.c (prime_generate_internal): Cast unsigned/char*
+ mismatch in calling m_out_of_n.
+ (is_prime): Changed COUNT to unsigned int *.
+
+ * ac.c (_gcry_ac_data_copy): Initialize DATA_MPIS.
+
+ * random.c (gcry_create_nonce): Update the pid after a fork.
+ Reported by Uoti Urpala.
+
+2006-07-04 Marcus Brinkmann <marcus@g10code.de>
+
+ * sha512.c: Fix typo in copyright notice.
+
+2006-06-21 Werner Koch <wk@g10code.com>
+
+ * rsa.c (_gcry_rsa_generate): Replace xcalloc by calloc.
+ * pubkey.c (gcry_pk_encrypt, gcry_pk_sign): Ditto.
+ (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_encrypt)
+ (gcry_pk_sign, gcry_pk_genkey, gcry_pk_get_keygrip): Ditto.
+ * md.c (md_copy): Ditto.
+
+2006-04-22 Moritz Schulte <moritz@g10code.com>
+
+ * random-daemon.c (_gcry_daemon_initialize_basics): New argument:
+ SOCKETNAME. Passing on to connect_to_socket() if non-NULL.
+ (connect_to_socket, writen, readn, call_daemon): New functions.
+ (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes)
+ (_gcry_daemon_create_nonce): Call call_daemon().
+ (RANDOM_DAEMON_SOCKET): New symbol.
+ (daemon_socket): New static variable.
+
+ * random.h (_gcry_daemon_initialize_basics): New parameter:
+ SOCKETNAME.
+ (_gcry_set_random_daemon_socket): New declaration.
+
+ * random.c (initialize_basics): Pass DAEMON_SOCKET_NAME to
+ _gcry_daemon_initialize_basics.
+ (_gcry_set_random_daemon_socket): New function, setting
+ DAEMON_SOCKET_NAME.
+
+2006-04-01 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (eme_pkcs_v1_5_encode): Use KEY_SIZE directly, no need to
+ call gcry_ac_key_get_nbits.
+ (eme_pkcs_v1_5_decode): Likewise.
+ (ac_es_dencode_prepare_pkcs_v1_5): Fill options_em structure with
+ key_size.
+ (_gcry_ac_data_dump, gcry_ac_data_dump): New functions.
+ (_gcry_ac_data_to_sexp, _gcry_ac_data_from_sexp): More or less
+ rewritten; changed S-Expression format so that it matches the one
+ used in pubkey.c.
+
+2006-03-15 Werner Koch <wk@g10code.com>
+
+ * random-daemon.c: New.
+ * random.c (_gcry_use_random_daemon): New.
+ (get_random_bytes, gcry_randomize, gcry_create_nonce): Try
+ diverting to the daemon functions.
+
+2006-03-14 Werner Koch <wk@g10code.com>
+
+ * random.c (lock_seed_file): New.
+ (read_seed_file, _gcry_update_random_seed_file): Use it.
+
+ * random.c (gcry_create_nonce): Detect a fork and re-seed.
+ (read_pool): Fixed the fork detection; it used to work only for
+ multi-threaded processes.
+
+2006-03-12 Brad Hards <bradh@frogmouth.net> (wk)
+
+ * md.c (md_open): Use new variable macpads_Bsize instead of
+ hardwiring the block size. Changed at all places.
+
+2006-03-10 Brad Hards <bradh@frogmouth.net> (wk, patch 2005-04-22)
+
+ * md.c, sha256.c: Add support for SHA-224.
+ (sha224_init): New.
+
+2006-01-18 Brad Hards <bradh@frogmouth.net> (wk 2006-03-07)
+
+ * cipher.c (cipher_encrypt, cipher_decrypt, do_ofb_encrypt)
+ (do_ofb_decrypt, gcry_cipher_open): Implement Output Feedback Mode.
+
+2005-11-02 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_algo_name): Return "?" instead of NULL for
+ unknown algorithm IDs.
+ * cipher.c (cipher_algo_to_string): Likewise.
+
+2005-11-01 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_algo_info): Don't forget to break after switch
+ case.
+
+2005-09-19 Werner Koch <wk@g10code.com>
+
+ * dsa.c (generate): Add preliminary support for 2 and 4 keys.
+ Return an error code if the key size is not supported.
+ (_gcry_dsa_generate): Return an error.
+
+2005-08-22 Werner Koch <wk@g10code.com>
+
+ * primegen.c (check_prime): New arg RM_ROUNDS.
+ (prime_generate_internal): Call it here with 5 rounds as used
+ before.
+ (gcry_prime_check): But here with 64 rounds.
+ (is_prime): Make sure never to use less than 5 rounds.
+
+2005-04-16 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (_gcry_ac_init): New function.
+
+2005-04-12 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (_gcry_ac_io_write, _gcry_ac_io_read): Initialize err to
+ make the compiler happy.
+ Always use errno, now that gcry_malloc() is guaranteed to set
+ errno on failure.
+ (_gcry_ac_data_to_sexp): Don't forget to goto out after error in
+ loop.
+ (_gcry_ac_data_to_sexp): Remove unused variable: mpi_list;
+ (_gcry_ac_data_to_sexp): Always deallocate sexp_buffer.
+ (_gcry_ac_data_from_sexp): Don't forget to initialize data_set_new.
+ (_gcry_ac_data_from_sexp): Handle special case, which is
+ necessary, since gcry_sexp_nth() does not distinguish between
+ "element does not exist" and "element is the empty list".
+ (_gcry_ac_io_init_va): Use assert to make sure that mode and type
+ are correct.
+ Use gcry_error_t types where gcry_err_code_t types have been used
+ before.
+
+2005-04-11 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (_gcry_ac_data_sign_scheme): Don't forget to initialize
+ buffer.
+
+ * whirlpool.c: New file.
+ * md.c (digest_table): Add whirlpool.
+ * Makefile.am (EXTRA_libcipher_la_SOURCES): Added: whirlpool.c.
+
+2005-03-30 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (_gcry_ac_data_from_sexp): Use length of SEXP_CUR, not
+ length of SEXP; do not forget to set SEXP_TMP to NULL after it has
+ been released.
+
+ (struct gcry_ac_mpi): New member: name_provided.
+ (_gcry_ac_data_set): Rename variable `name_final' to `name_cp';
+ remove const qualifier; change code to not cast away const
+ qualifiers; use name_provided member as well.
+ (_gcry_ac_data_set, _gcry_ac_data_get_name): Use name_provided
+ member of named mpi structure.
+
+ (gcry_ac_name_to_id): Do not forget to initialize err.
+ (_gcry_ac_data_get_index): Do not forget to initialize mpi_return;
+ use gcry_free() instead of free(); remove unnecessary cast; rename
+ mpi_return and name_return to mpi_cp and name_cp; adjust code.
+ (ac_data_mpi_copy): Do not cast away const qualifier.
+ (ac_data_values_destroy): Likewise.
+ (ac_data_construct): Likewise.
+
+ (ac_data_mpi_copy): Initialize flags to GCRY_AC_FLAG_DEALLOC.
+ (ac_data_extract): Use GCRY_AC_FLAG_DEALLOC instead of
+ GCRY_AC_FLAG_COPY.
+
+ (_gcry_ac_io_init_va, _gcry_ac_io_init, gcry_ac_io_init)
+ (gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read)
+ (_gcry_ac_io_read_all, _gcry_ac_io_process): New functions.
+ (gry_ac_em_dencode_t): Use gcry_ac_io_t in prototype instead of
+ memroy strings directly; adjust encode/decode functions to use io
+ objects.
+ (emsa_pkcs_v1_5_encode_data_cb): New function ...
+ (emsa_pkcs_v1_5_encode): ... use it here.
+ (ac_data_dencode): Use io objects.
+ (_gcry_ac_data_encode, _gcry_ac_data_decode, gcry_ac_data_encode)
+ (gcry_ac_data_decode): Likewise.
+ (_gcry_ac_data_encrypt_scheme, gcry_ac_data_encrypt_scheme)
+ (_gcry_ac_data_decrypt_scheme, gcry_ac_data_decrypt_scheme)
+ (_gcry_ac_data_sign_scheme, gcry_ac_data_sign_scheme)
+ (_gcry_ac_data_verify_scheme, gcry_ac_data_verify_scheme):
+ Likewise.
+
+2005-03-23 Werner Koch <wk@g10code.com>
+
+ * rndw32.c (_gcry_rndw32_gather_random_fast): While adding data
+ use the size of the object and not the one of its address. Bug
+ reported by Sascha Kiefer.
+
+2005-03-19 Moritz Schulte <moritz@g10code.com>
+
+ * cipher.c (do_cbc_encrypt): Be careful to not overwrite data,
+ which is to be used later on. This happend, in case CTS is
+ enabled and OUTBUF is equal to INBUF.
+
+2005-02-25 Werner Koch <wk@g10code.com>
+
+ * pubkey.c (gcry_pk_get_keygrip): Allow for shadowed-private-key.
+
+2005-02-13 Moritz Schulte <moritz@g10code.com>
+
+ * serpent.c: Updated from 1.2 branch:
+
+ s/u32_t/u32/ and s/byte_t/byte/. Too match what we have always
+ used and are using in all other files too
+ (serpent_test): Moved prototype out of a fucntion.
+
+2005-02-07 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c: Major parts rewritten.
+ * pubkey.c (_gcry_pk_get_elements): New function.
+
+2004-12-09 Werner Koch <wk@g10code.com>
+
+ * serpent.c (serpent_setkey): Moved prototype of serpent_test to
+ outer scope.
+
+2004-09-11 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (pubkey_table): Added an alias entry for GCRY_PK_ELG_E.
+
+2004-08-23 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c: Do not include <assert.h>.
+ * rndegd.c: Likewise.
+ * sha1.c: Likewise.
+ * rndunix.c: Likewise.
+ * rndlinux.c: Likewise.
+ * rmd160.c: Likewise.
+ * md5.c: Likewise.
+ * md4.c: Likewise.
+ * cipher.c: Likewise.
+ * crc.c: Likewise.
+ * blowfish.c: Likewise.
+
+ * pubkey.c (dummy_generate, dummy_check_secret_key)
+ (dummy_encrypt, dummy_decrypt, dummy_sign, dummy_verify): Return
+ err code GPG_ERR_NOT_IMPLEMENTED instead of aborting through
+ log_bug().
+ (dummy_get_nbits): Return 0 instead of aborting though log_bug().
+
+2004-08-19 Werner Koch <wk@g10code.de>
+
+ * pubkey.c (sexp_data_to_mpi): Changed the zero random byte
+ substituting code to actually do clever things. Thanks to
+ Matthias Urlichs for noting the implementation problem.
+
+2004-08-09 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_sign): Fixed memory leak; fix provided by
+ Modestas Vainius.
+
+2004-07-16 Werner Koch <wk@gnupg.org>
+
+ * rijndael.c (do_encrypt): Fix alignment problem. Bugs found by
+ Matthias Urlichs.
+ (do_decrypt): Ditto.
+ (keySched, keySched2): Use 2 macros along with unions in the key
+ schedule context.
+
+2004-07-14 Moritz Schulte <moritz@g10code.com>
+
+ * rsa.c (_gcry_rsa_decrypt): Don't forget to free "a". Thanks to
+ Nikos Mavroyanopoulos.
+
+2004-05-09 Werner Koch <wk@gnupg.org>
+
+ * random.c (read_pool): Mix the PID in to better protect after a
+ fork.
+
+2004-07-04 Moritz Schulte <moritz@g10code.com>
+
+ * serpent.c: Use "u32_t" instead of "unsigned long", do not
+ declare S-Box variables as "register". Fixes failure on
+ OpenBSD/sparc64, reported by Nikolay Sturm.
+
+2004-05-07 Werner Koch <wk@gnupg.org>
+
+ * random.c (initialize): Factored out some code to ..
+ (initialize_basics): .. new function.
+ (_gcry_random_initialize): Just call initialize_basics unless the
+ new arg FULL is set to TRUE.
+ (_gcry_fast_random_poll): Don't do anything unless the random
+ system has been really initialized.
+
+2004-05-07 Moritz Schulte <moritz@g10code.de>
+
+ * ac.c (gcry_ac_open): Do not dereference NULL pointer. Reported
+ by Umberto Salsi.
+
+2004-02-20 Werner Koch <wk@gnupg.org>
+
+ * primegen.c (check_prime): New args CB_FUNC and CB_ARG; call them
+ at different stages. Pass these arguments through all callers.
+
+2004-02-06 Werner Koch <wk@gnupg.org>
+
+ * des.c: Add a new OID as used by pkcs#12.
+
+ * rfc2268.c: New. Taken from libgcrypt.
+ * cipher.c: Setup the rfc2268 algorithm.
+
+2004-01-25 Moritz Schulte <mo@g10code.com>
+
+ * primegen.c (prime_generate_internal): Do not forget to free
+ `q_factor'; fixed by Brieuc Jeunhomme.
+ (prime_generate_internal): Do not forget to free `prime'.
+
+2004-01-14 Moritz Schulte <mo@g10code.com>
+
+ * ac.c (gcry_ac_data_set): New argument: flags; slightly
+ rewritten.
+ (gcry_ac_data_get_name, gcry_ac_data_get_index): Likewise.
+ (gcry_ac_key_pair_generate): New argument: misc_data; modified
+ order of arguments.
+ (gcry_ac_key_test): New argument: handle.
+ (gcry_ac_key_get_nbits, gcry_ac_key_get_grip): Likewise.
+ Use GCRY_AC_FLAG_NO_BLINDING instead of
+ GCRY_AC_DATA_FLAG_NO_BLINDING.
+ (gcry_ac_mpi): New member: flags.
+ (gcry_ac_data_search, gcry_ac_data_add): Removed functions.
+
+2003-12-22 Werner Koch <wk@gnupg.org>
+
+ * primegen.c (is_prime): Release A2.
+
+2003-12-19 Werner Koch <wk@gnupg.org>
+
+ * md.c: Moved a couple of functions down below the data structure
+ definitions.
+ (struct gcry_md_context): New field ACTUAL_HANDLE_SIZE.
+ (md_open): Set it here.
+ (strcut gcry_md_list): New field ACTUAL_STRUCT_SIZE.
+ (md_enable): Set it here.
+ (md_close): Wipe the context memory.
+ secure memory.
+ * cipher.c (struct gcry_cipher_handle): New field ACTUAL_HANDLE_SIZE.
+ (gcry_cipher_open): Set it here.
+ (gcry_cipher_close): Use it to always wipe out the handle data.
+
+ * ac.c (gcry_ac_open): Make sure HANDLE gets initialized even when
+ the function is not successful.
+ (gcry_ac_close): Allow a NULL handle.
+ (gcry_ac_key_destroy, gcry_ac_key_pair_destroy): Ditto.
+ (gcry_ac_key_get_grip): Return INV_OBJ on error.
+
+ * primegen.c (prime_generate_internal): Fixed error code for
+ failed malloc. Replaced the !err if chain by gotos.
+ (gcry_prime_group_generator): Remove the extra sanity check.
+
+ * md.c: Minor code and comment cleanups.
+
+2003-12-16 Werner Koch <wk@gnupg.org>
+
+ * primegen.c (gen_prime): Doc fix. Thanks to Newton Hammet.
+
+2003-12-11 Werner Koch <wk@gnupg.org>
+
+ * rndunix.c (slow_poll): Don't use #warning but #error.
+
+ * rndegd.c: Changed indentation.
+ (my_make_filename): Removd the var_arg cruft becuase we
+ don't need it here. Changed caller.
+
+ * rndlinux.c: Changed indentation.
+ (open_device): Remove the superfluous stat call and clarify
+ comment.
+
+ * rsa.c: Changed indentation.
+ (secret): Use the standard algorithm if p, q and u are not
+ available.
+ (rsa_blind, rsa_unblind): Renamed from _gcry_rsa_blind,
+ _gcry_rsa_unblind and moved more to the top.
+
+ * md4.c: Changed indentation. Removed unnecessary casts.
+ * md5.c, rmd160.c, sha1.c, tiger.c: Ditto.
+ * rijndael.c, twofish.c: Ditto.
+ * serpent.c: Removed unnecessary casts.
+ * sha256.c, sha512.c: Ditto.
+
+2003-12-09 Werner Koch <wk@gnupg.org>
+
+ * dsa.c: Unified indentation style.
+ * elgamal.c: Ditto.
+ * des.c (des_key_schedule): Code beautifications.
+ * blowfish.c: Changed indentation style.
+ * cast5.c (do_cast_setkey): Ditto.
+
+ * pubkey.c (gcry_pk_encrypt): Replaced the chain of if(!err) tests
+ by straightforward gotos. Other cleanups.
+ (gcry_pk_decrypt): Ditto.
+ (gcry_pk_sign): Ditto.
+ (gcry_pk_verify): Ditto.
+ (gcry_pk_genkey): Ditto. Use strtoul instead of strtol.
+ (gcry_pk_ctl): Use GPG_ERR_INV_ARG to indicate bad arguments.
+
+2003-12-07 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (gcry_pk_register_default): Undef the helper macro.
+ (gcry_pk_map_name): Allow NULL for string.
+ (sexp_to_key): Use memcpy and not strncpy. Use gcry_free and not
+ free.
+ (sexp_to_sig): Ditto.
+ (sexp_to_enc): Ditto. Replaced the chain of if(!err) tests by
+ straightforward gotos.
+
+2003-12-05 Werner Koch <wk@gnupg.org>
+
+ * cipher.c: Documentation cleanups.
+ (gcry_cipher_mode_from_oid): Allow NULL for STRING.
+
+2003-12-03 Werner Koch <wk@gnupg.org>
+
+ * elgamal.c (sign, do_encrypt, gen_k): Make sure that a small K is
+ only used for encryption.
+
+2003-11-18 Werner Koch <wk@gnupg.org>
+
+ * random.h (rndw32_set_dll_name): Removed unused prototype.
+
+ * Makefile.am (EXTRA_DIST): Added Manifest.
+
+2003-11-11 Werner Koch <wk@gnupg.org>
+
+ * Manifest: New.
+
+2003-11-04 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_hash_buffer): Use shortcut for SHA1
+ * sha1.c (_gcry_sha1_hash_buffer): New.
+
+ * random.c: Reformatted most functions.
+ (mix_pool): Moved the failsafe_digest from global
+ scope to here.
+ (do_fast_random_poll): Use the generic fucntions even if a fast
+ gathering function has been used.
+ (read_pool): Detect a fork and retry.
+ (gcry_randomize, get_random_bytes): Don't distinguish anymore
+ between weak and strong random.
+ (gcry_create_nonce): New.
+
+2003-10-31 Werner Koch <wk@gnupg.org>
+
+ * rndw32.c (slow_gatherer_windowsNT): Use a plain buffer for the
+ disk performance values and not the W32 API structure.
+
+ * dsa.c (verify): s/exp/ex/ due to shadowing of a builtin.
+ * elgamal.c (verify): Ditto.
+
+ * ac.c (gcry_ac_data_get_index): s/index/idx/
+ (gcry_ac_data_copy_internal): Remove the cast in _gcry_malloc.
+ (gcry_ac_data_add): Must use gcry_realloc instead of realloc.
+ * pubkey.c (sexp_elements_extract): s/index/idx/ as tribute to the
+ forehackers.
+ (gcry_pk_encrypt): Removed shadowed definition of I. Reordered
+ arguments to malloc for clarity.
+ (gcry_pk_sign, gcry_pk_genkey): Ditto.
+ * primegen.c (prime_generate_internal): s/random/randomlevel/.
+
+2003-10-27 Moritz Schulte <mo@g10code.com>
+
+ * pubkey.c (gcry_pk_encrypt): Don't forget to deallocate pkey.
+
+2003-10-27 Werner Koch <wk@gnupg.org>
+
+ * random.c (gcry_random_add_bytes): Return if buflen is zero to
+ avoid gcc warning about unsed parameter.
+ (MASK_LEVEL): Simplified; does now work for signed and unsigned
+ w/o warnings.
+
+ * md.c (md_start_debug): Removed the const from SUFFIX, because
+ this function is called from the control fucntion which does not
+ require const.
+
+ Prefixed all (pubkey,digest,cipher}_spec_* globale variables with
+ _gcry_.
+
+ * ac.c (ac_key_identifiers): Made static.
+
+ * random.c (getfnc_gather_random,getfnc_fast_random_poll): Move
+ prototypes to ..
+ * rand-internal.h: .. here
+ * random.c (getfnc_gather_random): Include rndw32 gatherer.
+ * rndunix.c, rndw32.c, rndegd.c: Include them here.
+ * rndlinux.c (_gcry_rndlinux_gather_random): Prepend the _gcry_
+ prefix. Changed all callers.
+ * rndegd.c (_gcry_rndegd_gather_random): Likewise.
+ (_gcry_rndegd_connect_socket): Likewise.
+ * rndunix.c (_gcry_rndunix_gather_random): Likewise.
+ (waitpid): Made static.
+ * rndw32.c: Removed the old and unused winseed.dll cruft.
+ (_gcry_rndw32_gather_random_fast): Renamed from
+ gather_random_fast.
+ (_gcry_rndw32_gather_random): Renamed from gather_random. Note,
+ that the changes 2003-04-08 somehow got lost.
+
+ * sha512.c (sha512_init, sha384_init): Made static.
+
+ * cipher.c (do_ctr_decrypt): Removed "return" from this void
+ function.
+
+2003-10-24 Moritz Schulte <mo@g10code.com>
+
+ * serpent.c: Fix an issue on big-endian systems.
+
+ * rndw32.c: Removed IS_MODULE -cruft.
+ * rndlinux.c (rndlinux_gather_random): Likewise.
+
+2003-10-10 Werner Koch <wk@gnupg.org>
+
+ * primegen.c (gen_prime): Bail out if NBITS is less than 16.
+ (prime_generate_internal): Initialize prime variable to suppress
+ compiler warning. Check pbits, initialize qbits when passed as
+ zero.
+
+ * primegen.c (prime_generate_internal): New arg
+ ALL_FACTORS. Changed all callers.
+ (gcry_prime_generate): Make the factors arg optional. Request
+ all_factors. Make sure PRIME is set to NULL even on error.
+ (gcry_prime_group_generator): New.
+ (gcry_prime_release_factors): New.
+
+2003-10-06 Werner Koch <wk@gnupg.org>
+
+ * primegen.c (gen_prime): Assert that NBITS is never zero, it
+ would cause a segv.
+
+2003-09-28 Moritz Schulte <mo@g10code.com>
+
+ * ac.c: Include "cipher.h".
+
+2003-09-27 Moritz Schulte <mo@g10code.com>
+
+ * rndegd.c (do_read): Return nread instead of nbytes; thanks to
+ Michael Caerwyn.
+
+2003-09-04 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (_gcry_pk_aliased_algo_name): New.
+ * ac.c (gcry_ac_open): Use it here.
+
+ * Makefile.am (EXTRA_libcipher_la_SOURCES): Add serpent.c
+
+2003-09-02 Moritz Schulte <mo@g10code.com>
+
+ * primegen.c (gcry_prime_check, gcry_prime_generate): New
+ functions.
+ (prime_generate_internal): New function, based on
+ _gcry_generate_elg_prime.
+ (_gcry_generate_elg_prime): Rewritten as a wrapper for
+ prime_generate_internal.
+
+2003-08-28 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (gcry_pk_encrypt): Don't include the flags list in the
+ return value. This does not make sense and breaks any programs
+ parsing the output strictly (e.g. current gpgsm).
+ (gcry_pk_encrypt): If aliases for the algorithm name exists, take
+ the first one instead of the regular name to adhere to SPKI
+ conventions.
+ (gcry_pk_genkey): Ditto.
+ (gcry_pk_sign): Ditto. Removed unused KEY_ALGO_NAME.
+
+2003-08-19 Moritz Schulte <mo@g10code.com>
+
+ * cipher.c: Add support for Serpent
+ * serpent.c: New file.
+
+2003-08-10 Moritz Schulte <moritz@g10code.com>
+
+ * rsa.c (_gcry_rsa_blind, _gcry_rsa_unblind): Declare static.
+
+2003-08-09 Timo Schulz <twoaday@freakmail.de>
+
+ * random.c (getfnc_gather_random): Don't check NAME_OF_DEV_RANDOM
+ two times, but also the NAME_OF_DEV_URANDOM device.
+
+2003-08-08 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (sexp_to_enc): Fixed extraction of S-Expression: do not
+ fail if no `flags' sub S-Expression is found.
+
+2003-07-27 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_lookup_func_oid): Allow for empty OID lists.
+
+2003-07-23 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (gcry_ac_data_construct): New argument: include_flags, only
+ include `flags' S-expression, if include_flags is true. Adjust
+ callers. Thanks for triggering a bug caused by `flags'
+ sub-S-expression where they are not expected to Ralf Schneider.
+
+2003-07-21 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_lookup_func_name): Use new member name
+ `aliases' instead of `sexp_names'.
+
+ * ac.c (gcry_ac_key_data_get): New function.
+
+ * cipher.c (gcry_cipher_lookup_func_name): Fix return value.
+
+2003-07-20 Moritz Schulte <moritz@g10code.com>
+
+ * blowfish.c: Adjusted for new gcry_cipher_spec_t structure.
+ * cast5.c: Likewise.
+ * twofish.c: Likewise.
+ * arcfour.c: Likewise.
+ * rijndael.c (rijndael_oids, rijndael192_oids, rijndael256_oids):
+ New variables, adjust for new gcry_cipher_spec_t structure.
+ * des.c (oids_tripledes): New variable, adjust for new
+ gcry_cipher_spec_t structure.
+
+ * md.c (oid_table): Removed.
+
+ * tiger.c (oid_spec_tiger): New variable.
+ (digest_spec_tiger): Adjusted for new gry_md_spec_t structure.
+
+ * sha512.c (oid_spec_sha512): New variable.
+ (digest_spec_sha512): Adjusted for new gry_md_spec_t structure.
+
+ * sha512.c (oid_spec_sha384): New variable.
+ (digest_spec_sha384): Adjusted for new gry_md_spec_t structure.
+
+ * sha256.c (oid_spec_sha256): New variable.
+ (digest_spec_sha256): Adjusted for new gry_md_spec_t structure.
+
+ * sha1.c (oid_spec_sha1): New variable.
+ (digest_spec_sha1): Adjusted for new gry_md_spec_t structure.
+
+ * rmd160.c (oid_spec_rmd160): New variable.
+ (digest_spec_rnd160): Adjusted for new gry_md_spec_t structure.
+
+ * md5.c (oid_spec_md5): New variable.
+ (digest_spec_md5): Adjusted for new gry_md_spec_t structure.
+
+ * md4.c (oid_spec_md4): New variable.
+ (digest_spec_md4): Adjusted for new gry_md_spec_t structure.
+
+ * crc.c (digest_spec_crc32, digest_spec_crc32_rfc1510,
+ digest_spec_crc32_rfc2440): Adjusted for new gry_md_spec_t
+ structure.
+
+2003-07-19 Moritz Schulte <moritz@g10code.com>
+
+ * md.c (gcry_md_lookup_func_oid): New function.
+ (search_oid): New function, copied from cipher.c.
+ (gcry_md_map_name): Adjust for new search_oid_interface.
+
+ * cipher.c (oid_table): Removed table.
+ (gcry_cipher_lookup_func_oid): New function.
+ (search_oid): Rewritten to use the module functions.
+ (gcry_cipher_map_name): Adjust for new search_oid interface.
+ (gcry_cipher_mode_from_oid): Likewise.
+
+2003-07-18 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_hash_buffer): Convert ERR to gpg_error_t in
+ gpg_strerror.
+
+2003-07-14 Moritz Schulte <moritz@g10code.com>
+
+ * cipher.c (gcry_cipher_lookup_func_name): Also check the cipher
+ name aliases, not just the primary name.
+ (gcry_cipher_map_name): Remove kludge for aliasing Rijndael to
+ AES.
+
+ * arcfour.c, blowfish.c, cast5.c, des.c, twofish.c: Adjust cipher
+ specification structures.
+
+ * rijndael.c (rijndael_names, rijndael192_names,
+ rijndael256_names): New variables, use them in the cipher
+ specifications.
+
+ * rmd160test.c: Removed file.
+
+ * ac.c, arcfour.c, blowfish.c, cast5.c, cipher.c, des.c, dsa.c,
+ elgamal.c, md.c, pubkey.c, random.c, rijndael.c, rsa.c, twofish.c:
+ Used gcry_err* wrappers for libgpg symbols.
+
+ * primegen.c (gen_prime): Correct the order arguments to
+ extra_check.
+
+2003-07-12 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c: Replaced all public occurences of gpg_error_t with
+ gcry_error_t.
+ * cipher.c: Likewise.
+ * md.c: Likewise.
+ * pubkey.c: Likewise.
+ * random.c: Likewise.
+
+ * cipher.c: Added support for TWOFISH128.
+
+2003-07-08 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (gcry_ac_data_copy_internal): New function, based on
+ gcry_ac_data_copy.
+ (gcry_ac_data_copy): Made public, use gcry_ac_data_copy_internal.
+ (gcry_ac_key_init): Use gcry_ac_data_copy_internal.
+
+2003-07-07 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c (gcry_ac_data_set): Only release old MPI value if it is
+ different from the new value. Bug reported by Simon Josefsson
+ <jas@extundo.com>.
+
+ * pubkey.c (gcry_pk_list): New function.
+ * md.c (gcry_md_list): New function.
+
+ * ac.c (gcry_ac_key_pair_generate): Fix calculation of format
+ string size.
+
+2003-07-05 Moritz Schulte <moritz@g10code.com>
+
+ * md.c: Named struct of digest_table `digest_table_entry'.
+ (digest_table_entry): New member: algorithm; filled in.
+ (digest_table_entry): Removed unused member: flags.
+ (gcry_md_register): New argument: algorithm_id, filled in.
+ (gcry_md_register_default): Used algorithm ID from module
+ structure.
+ (gcry_md_map_name): Likewise.
+ (md_enable): Likewise.
+ (md_read): Likewise.
+ (gcry_md_info): Likewise.
+
+ * pubkey.c: Named truct for pubkey_table `pubkey_table_entry'.
+ (pubkey_table_entry): New member: algorithm; filled in.
+ (gcry_pk_register_default): Used algorithm ID from pubkey_table.
+ (gcry_pk_register): New argument: algorithm_id, filled in.
+ (gcry_pk_map_name): Used algorithm ID from module structure.
+ (gcry_pk_decrypt): Likewise.
+ (gcry_pk_encrypt): Likewise.
+ (gcry_pk_verify): Likewise.
+ (gcry_pk_sign): Likewise.
+ (gcry_pk_testkey): Likewise.
+ (gcry_pk_genkey): Likewise.
+ (gcry_pk_get_nbits): Likewise.
+ (sexp_to_key): Removed unused variable: algo.
+ (sexp_to_sig): Likewise.
+
+ * cipher.c: Named struct for cipher_table `cipher_table_entry'.
+ (cipher_table_entry): New member: algorithm; filled in.
+ (gcry_cipher_register_default): Used algorithm ID from
+ cipher_table.
+ (gcry_cipher_register): New argument: algorithm_id, filled in.
+ (gcry_cipher_map_name): Used algorithm ID from module structure.
+
+ * arcfour.c (cipher_spec_arcfour): Removed algorithm ID.
+ * blowfish.c (cipher_spec_blowfish): Likewise.
+ * cast5.c (cipher_spec_cast5): Likewise.
+ * crc.c (digest_spec_crc32): Likewise.
+ * crc.c (digest_spec_crc32_rfc1510): Likewise.
+ * crc.c (digest_spec_crc32_rfc2440): Likewise.
+ * des.c (cipher_spec_des): Likewise.
+ * des.c (cipher_spec_tripledes): Likewise.
+ * dsa.c (pubkey_spec_dsa): Likewise.
+ * elgamal.c (pubkey_spec_elg): Likewise.
+ * md4.c (digest_spec_md4): Likewise.
+ * md5.c (digest_spec_md5): Likewise.
+ * aes.c (cipher_spec_aes): Likewise.
+ * aes.c (cipher_spec_aes192): Likewise.
+ * aes.c (cipher_spec_aes256): Likewise.
+ * rsa.c (pubkey_spec_rsa): Likewise.
+ * sha1.c (digest_spec_sha1): Likewise.
+ * sha256.c (digest_spec_sha256): Likewise.
+ * sha512.c (digest_spec_sha512): Likewise.
+ * tiger.c (digest_spec_tiger): Likewise.
+ * twofish.c (cipher_spec_twofish): Likewise.
+ * twofish.c (cipher_spec_twofish128): Likewise.
+
+ * Makefile.am (EXTRA_libcipher_la_SOURCES): Fix list of source
+ files; reported by Simon Josefsson <jas@extundo.com>.
+
+ * pubkey.c: Replaced all occurences of `id' with `algorithm',
+ since `id' is a keyword in obj-c.
+ * md.c: Likewise.
+ * cipher.c: Likewise.
+
+ * crc.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, tiger.c:
+ Replaced all occurences of gcry_digest_spec_t with gcry_md_spec_t.
+
+ * dsa.c, rsa.c, elgamal.c: Replaced all occurencens of
+ gcry_pubkey_spec_t with gcry_pk_spec_t.
+
+ * md.c: Replaced all occurences of gcry_digest_spec_t with
+ gcry_md_spec_t.
+ (gcry_digest_register_default): Renamed to ...
+ (gcry_md_register_default): ... this; adjusted callers.
+ (gcry_digest_lookup_func_name): Renamed to ...
+ (gcry_md_lookup_func_name): ... this; adjusted callers.
+ (gcry_digest_lookup_name): Renamed to ...
+ (gcry_md_lookup_name): ... this; adjusted callers.
+ (gcry_digest_register): Renamed to ...
+ (gcry_md_register): ... this.
+ (gcry_digest_unregister): Renamed to ...
+ (gcry_md_unregister): ... this.
+
+ * pubkey.c (gcry_pubkey_register): Renamed to ...
+ (gcry_pk_register): ... this.
+ (gcry_pubkey_unregister): Renamed to ...
+ (gcry_pk_unregister): ... this.
+ Replaced all occurences of gcry_pubkey_spec_t with gcry_pk_spec_t.
+ (gcry_pubkey_register_default): Renamed to ...
+ (gcry_pk_register_default): ... this; adjusted callers.
+ (gcry_pubkey_lookup_func_name): Renamed to ...
+ (gcry_pk_lookup_func_name): ... this; adjusted callers.
+ (gcry_pubkey_lookup_name): Renamed to ...
+ (gcry_pk_lookup_name): ... this; adjusted callers.
+
+ * md.c (gcry_md_hash_buffer): Fix error checking. Thanks to Simon
+ Josefsson <jas@extunde.com>.
+
+2003-07-04 Moritz Schulte <moritz@g10code.com>
+
+ * cipher.c (gcry_cipher_list): New function.
+
+2003-07-01 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (sexp_to_sig): Accept a `flags' S-expression to be more
+ consistent with sexp_to_enc.
+
+2003-06-30 Moritz Schulte <moritz@g10code.com>
+
+ * Makefile.am (libcipher_la_SOURCES): Added: ac.c.
+
+ * pubkey.c (_gcry_pk_module_lookup): New function.
+ (_gcry_pk_module_release): New function.
+
+2003-06-29 Moritz Schulte <moritz@g10code.com>
+
+ * ac.c: New file.
+
+2003-06-26 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_hash_buffer): Trigger BUG correcly with new API.
+
+2003-06-19 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_is_enabled): Fixed.
+
+2003-06-18 Werner Koch <wk@gnupg.org>
+
+ * cipher.c (gcry_cipher_get_algo_keylen): New.
+ (gcry_cipher_get_algo_blklen): New.
+
+2003-06-18 Moritz Schulte <moritz@g10code.com>
+
+ * arcfour.c, cipher.c, blowfish.c, md.c, cast5.c, pubkey.c, crc.c,
+ des.c, dsa.c, elgamal.c, md4.c, md5.c, random.c, rijndael.c,
+ rmd160.c, rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c:
+ Replaced older types GcryDigestSpec, GcryCipherSpec and
+ GcryPubkeySpec with newer types: gcry_digest_spec_t,
+ gcry_cipher_spec_t and gcry_pubkey_spec_t.
+
+ * md.c (gcry_digest_id_new): Removed function.
+ (gcry_digest_register): Removed code for generating a new module
+ ID.
+
+ * pubkey.c (gcry_pubkey_id_new): Removed function.
+ (gcry_pubkey_register): Removed code for generating a new module
+ ID.
+
+ * cipher.c, md.c, pubkey.c: Replace old type GcryModule with newer
+ one: gcry_module_t.
+ (gcry_cipher_id_new): Removed function.
+ (gcry_cipher_register): Removed code for generating a new module
+ ID.
+
+ * cipher.c (gcry_cipher_register): Adjust call to
+ _gcry_module_add.
+ (gcry_cipher_register_default): Likewise.
+ * pubkey.c (gcry_pubkey_register_default): Likewise.
+ (gcry_pubkey_register): Likewise.
+ * md.c (gcry_digest_register_default): Likewise.
+ (gcry_digest_register): Likewise.
+
+ * md.c (gcry_digest_lookup_func_id): Removed function.
+ (gcry_digest_lookup_id): Likewise.
+ (gcry_digest_id_new): Use _gcry_module_lookup_id instead of
+ gcry_digest_lookup_id.
+ (digest_algo_to_string): Likewise.
+ (check_digest_algo): Likewise.
+ (md_enable): Likewise.
+ (md_digest_length): Likewise.
+ (md_asn_oid): Likewise.
+
+ * pubkey.c (gcry_pubkey_lookup_id): Removed function.
+ (gcry_pubkey_lookup_func_id): Likewise.
+ (gcry_pubkey_id_new): Use _gcry_module_lookup_id instead of
+ gcry_pubkey_id_new.
+ (gcry_pk_algo_name): Likewise.
+ (disable_pubkey_algo): Likewise.
+ (check_pubkey_algo): Likewise.
+ (pubkey_get_npkey): Likewise.
+ (pubkey_get_nskey): Likewise.
+ (pubkey_get_nsig): Likewise.
+ (pubkey_get_nenc): Likewise.
+ (pubkey_generate): Likewise.
+ (pubkey_check_secret_key): Likewise.
+ (pubkey_encrypt): Likewise.
+ (pubkey_decrypt): Likewise.
+ (pubkey_sign): Likewise.
+ (pubkey_verify): Likewise.
+ (gcry_pk_algo_info): Likewise.
+
+ * cipher.c (gcry_cipher_lookup_func_id): Removed function.
+ (gcry_cipher_lookup_id): Likewise.
+ (cipher_algo_to_string): use _gcry_module_lookup_id instead of
+ gcry_cipher_lookup_id.
+ (disable_cipher_algo): Likewise.
+ (check_cipher_algo): Likewise.
+ (cipher_get_blocksize): Likewise.
+ (gcry_cipher_open): Likewise.
+ (gcry_cipher_id_new): Likewise.
+
+2003-06-17 Moritz Schulte <moritz@g10code.com>
+
+ * Makefile.am (GCRYPT_MODULES): Set to @GCRYPT_CIPHERS@,
+ @GCRYPT_PUBKEY_CIPHERS@, @GCRYPT_DIGESTS@ and @GCRYPT_RANDOM@.
+ (libcipher_la_DEPENDENCIES): Set to $(GCRYPT_MODULES).
+ (libcipher_la_LIBADD): Likewise.
+ (AM_CFLAGS): Added: @GPG_ERROR_CFLAGS@.
+ (EXTRA_libcipher_la_SOURCES): Added all conditional sources.
+
+ * md.c (md_open): Use _gcry_fast_random_poll instead of
+ fast_random_poll.
+ * cipher.c (gcry_cipher_open): Likewise.
+
+ * random.h (fast_random_poll): Removed macro.
+
+ * blowfish.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, sha512.c,
+ tiger.c: Use Autoconf's WORDS_BIGENDIAN instead of our own
+ BIG_ENDIAN_HOST.
+
+2003-06-16 Moritz Schulte <moritz@g10code.com>
+
+ * random.c (getfnc_gather_random): Do not special-case
+ USE_ALL_RANDOM_MODULES, make it the default.
+
+ * dsa.c: Replace last occurences of old type names with newer
+ names (i.e. replace MPI with gcry_mpi_t).
+ * elgamal.c: Likewise.
+ * primegen.c: Likewise.
+ * pubkey.c: Likewise.
+ * rsa.c: Likewise.
+
+2003-06-14 Moritz Schulte <moritz@g10code.com>
+
+ * des.c (des_setkey): Add selftest check.
+ (tripledes_set3keys): Likewise.
+ (do_tripledes_setkey): Remove selftest check.
+ (do_des_setkey): Likewise.
+
+2003-06-11 Moritz Schulte <moritz@g10code.com>
+
+ * md.c (_gcry_md_init): New function.
+ * cipher.c (_gcry_cipher_init): New function.
+ * pubkey.c (_gcry_pk_init): New function.
+
+2003-06-13 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_get_algo): Reverted to old API. This is a
+ convenience function anyway and error checking is not approriate.
+ (gcry_md_is_secure): New.
+ (gcry_md_is_enabled): New.
+
+2003-06-12 Werner Koch <wk@gnupg.org>
+
+ * cipher.c (gcry_cipher_open): Make sure HANDLE is set to NULL on
+ error.
+
+2003-06-11 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_open): Make sure H receives either NULL or an
+ valid handle.
+ (gcry_md_copy): Swapped arguments so that it is more in lione with
+ md_open and most other API fucntions like memcpy (destination
+ comes first). Make sure HANDLE is set to NULL on error.
+
+ * rijndael.c (do_encrypt): Hack to force correct alignment. It
+ seems not to be not sufficient, though. We should rework this
+ fucntions and remove all these ugly casts. Let the compiler
+ optimize or have an assembler implementation.
+
+2003-06-09 Moritz Schulte <moritz@g10code.com>
+
+ * Makefile.am: Removed rules serpent, since that is not commited
+ yet.
+
+2003-06-08 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_encrypt): Improve calculation for size of the
+ format string.
+
+2003-06-07 Moritz Schulte <moritz@g10code.com>
+
+ * arcfour.c, bithelp.h, blowfish.c, cast5.c, cipher.c, crc.c,
+ des.c, dsa.c, elgamal.c, md4.c, md5.c, md.c, primegen.c, pubkey.c,
+ rand-internal.h, random.c, random.h, rijndael.c, rmd160.c,
+ rmd160test.c, rmd.h, rndeged.c, rndlinux.c, rndunix.c, rndw32.c,
+ rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c: Edited all
+ preprocessor instructions to remove whitespace before the '#'.
+ This is not required by C89, but there are some compilers out
+ there that don't like it. Replaced any occurence of the now
+ deprecated type names with the new ones.
+
+2003-06-04 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_encrypt): Construct an arg_list and use
+ gcry_sexp_build_array instead of gcry_sexp_build.
+ (gcry_pk_sign): Likewise.
+ (gcry_pk_genkey): Likewise.
+
+2003-06-01 Moritz Schulte <moritz@g10code.com>
+
+ * dsa.c (_gcry_dsa_generate): Do not check wether the algorithm ID
+ does indeed belong to DSA.
+ (_gcry_dsa_sign): Likewise.
+ (_gcry_dsa_verify): Likewise.
+ (_gcry_dsa_get_nbits): Likewise.
+
+ * elgamal.c (_gcry_elg_check_secret_key): Do not check wether the
+ algorithm ID does indeed belong to ElGamal.
+ (_gcry_elg_encrypt): Likewise.
+ (_gcry_elg_decrypt): Likewise.
+ (_gcry_elg_sign): Likewise.
+ (_gcry_elg_verify): Likewise.
+ (_gcry_elg_get_nbits): Likewise.
+ (_gcry_elg_generate): Likewise.
+
+ * rsa.c (_gcry_rsa_generate): Do not check wether the algorithm ID
+ does indeed belong to RSA.
+ (_gcry_rsa_encrypt): Likewise.
+ (_gcry_rsa_decrypt): Likewise.
+ (_gcry_rsa_sign): Likewise.
+ (_gcry_rsa_verify): Likewise.
+ (_gcry_rsa_get_nbits): Likewise.
+
+2003-05-30 Moritz Schulte <moritz@g10code.com>
+
+ * md.c (md_get_algo): Return zero in case to algorithm is enabled.
+
+ * md.c (gcry_md_info): Adjusted for new no-errno-API.
+ (md_final): Likewise.
+ (gcry_md_get_algo): Likewise.
+ * pubkey.c (gcry_pk_get_keygrip): Likewise.
+ (gcry_pk_ctl): Likewise.
+ (gcry_pk_algo_info): Likewise.
+ * des.c (selftest): Likewise.
+
+2003-05-29 Moritz Schulte <moritz@g10code.com>
+
+ * md.c (md_enable): Do not forget to release module on error.
+ (gcry_md_open): Adjusted for new no-errno-API.
+ (md_open): Likewise.
+ (md_copy): Likewise.
+ (gcry_md_copy): Likewise.
+ (gcry_md_setkey): Likewise.
+ (gcry_md_algo_info): Likewise.
+
+ * cipher.c (gcry_cipher_open): Adjusted for new no-errno-API and
+ also fixed a locking bug.
+ (gcry_cipher_encrypt): Adjusted for new no-errno-API.
+ (gcry_cipher_decrypt): Likewise.
+ (gcry_cipher_ctl): Likewise.
+ (gcry_cipher_info): Likewise.
+ (gcry_cipher_algo_info): Likewise.
+
+2003-05-28 Moritz Schulte <moritz@g10code.com>
+
+ * md.c (md_enable): Adjusted for libgpg-error.
+ (gcry_md_enable): Likewise.
+ (gcry_digest_register_default): Likewise.
+ (gcry_digest_register): Likewise.
+ (check_digest_algo): Likewise.
+ (prepare_macpads): Likewise.
+ (gcry_md_setkey): Likewise.
+ (gcry_md_ctl): Likewise.
+ (gcry_md_get): Likewise.
+ (gcry_md_algo_info): Likewise.
+ (gcry_md_info): Likewise.
+ * dsa.c (_gcry_dsa_generate): Likewise.
+ (_gcry_dsa_check_secret_key): Likewise.
+ (_gcry_dsa_sign): Likewie.
+ (_gcry_dsa_verify): Likewise.
+ * twofish.c (do_twofish_setkey): Likewise.
+ (twofish_setkey): Likewise.
+ * cipher.c (gcry_cipher_register): Likewise.
+
+2003-05-25 Moritz Schulte <moritz@g10code.com>
+
+ * rijndael.c (do_setkey): Adjusted for libgpg-error.
+ (rijndael_setkey): Likewise.
+ * random.c (gcry_random_add_bytes): Likewise.
+ * elgamal.c (_gcry_elg_generate): Likewise.
+ (_gcry_elg_check_secret_key): Likewise.
+ (_gcry_elg_encrypt): Likewise.
+ (_gcry_elg_decrypt): Likewise.
+ (_gcry_elg_sign): Likewise.
+ (_gcry_elg_verify): Likewise.
+ * rsa.c (_gcry_rsa_generate): Likewise.
+ (_gcry_rsa_check_secret_key): Likewise.
+ (_gcry_rsa_encrypt): Likewise.
+ (_gcry_rsa_decrypt): Likewise.
+ (_gcry_rsa_sign): Likewise.
+ (_gcry_rsa_verify): Likewise.
+ * pubkey.c (dummy_generate, dummy_check_secret_key, dummy_encrypt,
+ dummy_decrypt, dummy_sign, dummy_verify): Likewise.
+ (gcry_pubkey_register): Likewise.
+ (check_pubkey_algo): Likewise.
+ (pubkey_generate): Likewise.
+ (pubkey_check_secret_key): Likewise.
+ (pubkey_encrypt): Likewise.
+ (pubkey_decrypt): Likewise.
+ (pubkey_sign): Likewise.
+ (pubkey_verify): Likewise.
+ (sexp_elements_extract): Likewise.
+ (sexp_to_key): Likewise.
+ (sexp_to_sig): Likewise.
+ (sexp_to_enc): Likewise.
+ (sexp_data_to_mpi): Likewise.
+ (gcry_pk_encrypt): Likewise.
+ (gcry_pk_decrypt): Likewise.
+ (gcry_pk_sign): Likewise.
+ (gcry_pk_verify): Likewise.
+ (gcry_pk_testkey): Likewise.
+ (gcry_pk_genkey): Likewise.
+ (gcry_pk_ctl): Likewise.
+ * cipher.c (dummy_setkey): Likewise.
+ (check_cipher_algo): Likewise.
+ (gcry_cipher_open): Likewise.
+ (cipher_setkey): Likewise.
+ (gcry_cipher_ctl): Likewise.
+ (cipher_encrypt): Likewise.
+ (gcry_cipher_encrypt): Likewise.
+ (cipher_decrypt): Likewise.
+ (gcry_cipher_decrypt): Likewise.
+ (gcry_cipher_info): Likewise.
+ (gcry_cipher_algo_info): Likewise.
+ * cast5.c (cast_setkey): Likewise.
+ (do_cast_setkey): Likewise.
+ * arcfour.c (arcfour_setkey): Likewise.
+ (do_arcfour_setkey): Likewise.
+ * blowfish.c (do_bf_setkey): Likewise.
+ (bf_setkey): Likewise.
+ * des.c (do_des_setkey): Likewise.
+ (do_tripledes_setkey): Likewise.
+
+2003-05-22 Moritz Schulte <moritz@g10code.com>
+
+ * tiger.c: Merged code ussing the U64_C macro from GnuPG.
+
+ * sha512.c: Likewise.
+
+2003-05-17 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_genkey): Fix type: acquire a lock, instead of
+ releasing it.
+
+2003-05-11 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_testkey): Call REGISTER_DEFAULT_CIPHERS.
+ (gcry_pk_ctl): Likewise.
+
+2003-04-27 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (gcry_pk_genkey): Release sexp after extracted data has
+ been used.
+
+ * md.c (gcry_md_get_algo_dlen): Simplified, simply call
+ md_digest_length to do the job.
+
+ * des.c (do_des_setkey): Check for selftest failure not only
+ during initialization.
+ (do_tripledes_setkey): Include check for selftest failure.
+
+ * pubkey.c (gcry_pubkey_register_default): New macro
+ `pubkey_use_dummy', use it.
+
+ * elgamal.c (elg_names): New variable.
+ (pubkey_spec_elg): Include elg_names.
+
+ * dsa.c (dsa_names): New variable.
+ (pubkey_spec_dsa): Include dsa_names.
+
+ * rsa.c (rsa_names): New variable.
+ (pubkey_spec_rsa): Include rsa_names.
+
+ * pubkey.c (gcry_pubkey_lookup_func_name): Compare name also with
+ the names listed in `sexp_names'.
+
+2003-04-24 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (sexp_to_key): New variables: module, pubkey. Adjusted
+ to new module interface.
+ (sexp_to_key): Changend type of argument `retalgo' from `int *' to
+ `GcryModule **'. Adjusted all callers. Removed argument:
+ r_algotblidx.
+ (sexp_to_sig): Changend type of argument `retalgo' from `int *' to
+ `GcryModule **'. Adjusted all callers.
+ (sexp_to_enc): Likewise.
+
+ (pubkey_get_npkey, pubkey_get_nskey, pubkey_get_nsig,
+ pubkey_get_nenc): Use strlen to find out the number.
+
+ * rsa.c: Adjust pubkey_spec_rsa to new internal interface.
+ * dsa.c: Likewise.
+ * elgamal.c: Likewise.
+
+2003-04-17 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c (sexp_elements_extract): New function.
+ * pubkey.c (sexp_to_key): Removed variable `idx', added `err', use
+ sexp_elements_extract.
+ (sexp_to_sig): Likewise.
+ (sexp_to_enc): Likewise.
+
+ * pubkey.c: Terminate list correctly.
+ * md.c: Include sha512/sha384 in digest_table.
+
+2003-04-16 Moritz Schulte <moritz@g10code.com>
+
+ * Makefile.am: Include support for sha512.c.
+
+ * sha512.c: New file, merged from GnuPG, with few modifications
+ for libgcrypt.
+
+ * rand-internal.h: Removed declarations for constructor functions.
+
+ * md.c (md_copy): Call _gcry_module_use for incrementing the usage
+ counter of the digest modules.
+
+ * rsa.c: Do not include "rsa.h".
+ * dsa.c: Do not include "dsa.h".
+ * elgamal.c: Do not include "elgamal.h".
+ * des.c: Do not include "des.h".
+ * cast5.c: Do not include "cast5.h".
+ * blowfish.c: Do not include "blowfish.h".
+ * arcfour.c: Do not include "arcfour.h".
+
+ * Makefile.am (libcipher_la_DEPENDENCIES): Removed.
+ (libcipher_la_LIBADD): Removed.
+ Use Automake conditionals for conditional compilation.
+
+2003-04-13 Moritz Schulte <moritz@g10code.com>
+
+ * cipher.c (gcry_cipher_open): Call REGISTER_DEFAULT_CIPHERS.
+
+ * md.c (gcry_md_list): New member: module.
+ (md_enable): New variable: module, changed use of module and
+ digest.
+ (md_enable): Initialize member: module.
+ (md_close): Call _gcry_module_release.
+
+ * cipher.c (gcry_cipher_open): New variable: module, changed use of
+ module and cipher.
+ (struct gcry_cipher_handle): New member: module.
+ (gcry_cipher_open): Initialize member: module.
+ (gcry_cipher_close): Call _gcry_module_release.
+
+2003-04-09 Moritz Schulte <moritz@g10code.com>
+
+ * cipher.c: Include "ath.h".
+ * md.c: Likewise.
+ * pubkey.c: Likewise.
+
+ * cipher.c (ciphers_registered_lock): New variable.
+ * md.c (digests_registered_lock): New variable.
+ * pubkey.c (pubkeys_registered_lock): New variable.
+
+ * rndlinux.c (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_rndlinux_constructor): Removed function.
+
+ * rndegd.c (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_rndegd_constructor): Removed function.
+
+ * rndunix.c (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_rndunix_constructor): Removed function.
+
+ * rndw32.c (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_rndw32_constructor): Removed function.
+
+ * rndegd.c (rndegd_connect_socket): Simplify code for creating the
+ egd socket address.
+ (rndegd_connect_socket): Call log_fatal use instead of
+ g10_log_fatal.
+ (egd_gather_random): Renamed to ...
+ (rndegd_gather_random): ... here.
+
+2003-04-08 Moritz Schulte <moritz@g10code.com>
+
+ * rndlinux.c: Do not include "dynload.h".
+ * rndunix.c: Likewise.
+ * rndw32.c: Likewise.
+
+ * rndegd.c (rndegd_connect_socket): Factored out from ...
+ (egd_gather_random): here; call it.
+ (egd_socket): New variable.
+ (egd_gather_random): Initialize fd with egd_socket, do not declare
+ fd static.
+ (do_read): Merged few changes from GnuPG. FIXME - not finished?
+ Do not include "dynload.h".
+
+ * rndw32.c (gather_random): Renamed to rndw32_gather_random, do
+ not declare static.
+ (gather_random_fast): Renamed to rndw32_gather_random_fast, do not
+ declare static.
+
+ * rndunix.c (gather_random): Renamed to rndunix_gather_random, do
+ not declare static.
+ * rndegd.c (gather_random): Renamed to rndegd_gather_random, do
+ not declare static.
+ * rndlinux.c (gather_random): Renamed to rndlinux_gather_random,
+ do not declare static.
+
+2003-04-07 Moritz Schulte <moritz@g10code.com>
+
+ * Makefile.am (libcipher_la_SOURCES): Removed construct.c.
+ (libcipher_la_SOURCES): Added sha1.c, sha256.c, rmd160.c, md4.c,
+ md5.c, tiger.c and crc.c
+ (EXTRA_PROGRAMS): Removed sha1, sha256, rmd160, md4, md5, tiger
+ and crc. Removed definitions: EXTRA_md4_SOURCES,
+ EXTRA_md5_SOURCES, EXTRA_rmd160_SOURCES, EXTRA_sha1_SOURCES,
+ EXTRA_sha256_SOURCES, EXTRA_tiger_SOURCES and EXTRA_crc_SOURCES,
+ BUILT_SOURCES, DISTCLEANFILES.
+
+ * pubkey.c: Do not include "elgamal.h", "dsa.h" and "rsa.h".
+
+ * Makefile.am (libcipher_la_SOURCES): Removed rsa.h, elgamal.h,
+ dsa.h, des.h, cast5.h, arcfour.h and blowfish.h.
+
+ * rsa.h: Removed file.
+ * elgamal.h: Removed file.
+ * dsa.h: Removed file.
+ * des.h: Removed file.
+ * cast5.h: Removed file.
+ * arcfour.h: Removed file.
+ * blowfish.h: Removed file.
+
+ * Makefile.am (libcipher_la_SOURCES): Removed dynload.c and
+ dynload.h.
+
+ * rsa.c (pubkey_spec_rsa): New variable.
+ * dsa.c (pubkey_spec_rsa): New variable.
+ * elgamal.c (pubkey_spec_elg): New variable.
+
+ * rsa.c (_gcry_rsa_get_info): Removed function.
+ * elgamal.c (_gcry_elg_get_info): Removed function.
+ * dsa.c (_gcry_dsa_get_info): Removed function.
+
+ * tiger.c (tiger_get_info): Removed function.
+ (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_tiger_constructor): Removed function.
+
+ * sha1.c (sha1_get_info): Removed function.
+ (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_sha1_constructor): Removed function.
+
+ * sha256.c (sha256_get_info): Removed function.
+ (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_sha256_constructor): Removed function.
+
+ * rmd160.c (rmd160_get_info): Removed function.
+ (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_rmd160_constructor): Removed function.
+
+ * md5.c (md5_get_info): Removed function.
+ (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_md5_constructor): Removed function.
+
+ * md4.c (md4_get_info): Removed function.
+ (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func): Removed function.
+ (_gcry_md4_constructor): Removed function.
+
+ * crc.c (crc_get_info): Removed function.
+
+ * arcfour.c (do_arcfour_setkey): Changed type of context argument
+ to `void *', added local variable for cast, adjusted callers.
+ (arcfour_setkey): Likewise.
+ (encrypt_stream): Likewise.
+ * cast5.c (cast_setkey): Likewise.
+ (encrypt_block): Likewise.
+ * rijndael.c (rijndael_setkey): Likewise.
+ (rijndael_encrypt): Likewise.
+ (rijndael_decrypt): Likewise.
+ * twofish.c (twofish_setkey): Likewise.
+ (twofish_encrypt): Likewise.
+ (twofish_decrypt): Likewise.
+ * des.c (do_des_setkey): Likewise.
+ (do_des_encrypt): Likewise.
+ (do_des_encrypt): Likewise.
+ (do_tripledes_encrypt): Likewise.
+ (do_tripledes_encrypt): Likewise.
+ * blowfish.c (bf_setkey: Likewise.
+ (encrypt_block): Likewise.
+ (decrypt_block): Likewise.
+
+ * arcfour.c (encrypt_stream): Likewise.
+
+ * rijndael.c (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func) Removed function.
+
+ * twofish.c (gnupgext_version, func_table): Removed definitions.
+ (gnupgext_enum_func) Removed function.
+
+ * cast5.c (CIPHER_ALGO_CAST5): Removed.
+
+ * blowfish.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros.
+ (CIPHER_ALGO_BLOWFISH): Removed symbol.
+ * cast5.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Likewise.
+ * des.c (selftest_failed): Removed.
+ (initialized): New variable.
+ (do_des_setkey): Run selftest, if not yet done.
+ (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros.
+
+ * arcfour.c (_gcry_arcfour_get_info): Removed function.
+ * blowfish.c (_gcry_blowfish_get_info): Removed function.
+ * cast5.c (_gcry_cast5_get_info): Removed function.
+ * des.c (_gcry_des_get_info): Removed function.
+ * rijndael.c (_gcry_rijndael_get_info): Removed function.
+ * twofish.c (_gcry_twofish_get_info): Removed function.
+
+ * arcfour.c (cipher_spec_arcfour): New variable.
+ * twofish.c (cipher_spec_twofish, cipher_spec_twofish128): New
+ variables.
+ * rijndael.c (cipher_spec_aes, cipher_spec_aes192,
+ cipher_spec256): New variables.
+ * des.c (cipher_spec_des, cipher_spec_tripledes): New variables.
+ * cast5.c (cipher_spec_cast5): New variable.
+ * blowfish.c (cipher_spec_blowfish): Likewise.
+
+ * twofish.c: Do not include "dynload.h".
+ * rijndael.c: Likewise.
+ * des.c: Likewise.
+ * cast5.c: Likewise.
+ * blowfish.c: Likewise.
+ * cipher.c: Likewise.
+ * crc.c: Likewise.
+ * md4.c: Likewise.
+ * md5.c: Likewise.
+ * md.c: Likewise.
+ * pubkey.c: Likewise.
+ * rijndael.c: Likewise.
+ * sha1.c: Likewise.
+ * sha256.c: Likewise.
+
+ * arcfour.c: Include "cipher.h".
+ * twofish.c: Likewise.
+ * rijndael.c: Likewise.
+ * des.c: Likewise.
+ * cast5.c: Likewise.
+ * blowfish.c: Likewise.
+
+ * twofish.c (twofish_setkey): Declared argument `key' const.
+ (twofish_encrypt): Declared argument `inbuf' const.
+ (twofish_decrypt): Likewise.
+
+ * rijndael.c (rijndael_setkey): Declared argument `key' const.
+ (rijndael_encrypt): Declared argument `inbuf' const.
+ (rijndael_decrypt): Likewise.
+
+ * des.c (do_des_setkey): Declared argument `key' const.
+ (do_tripledes_setkey): Likewise.
+ (do_des_encrypt): Declared argument `inbuf' const.
+ (do_des_decrypt): Likewise.
+ (do_tripledes_encrypt): Likewise.
+ (do_tripledes_decrypt): Likewise.
+
+ * cast5.c (encrypt_block): Declared argument `inbuf' const.
+ (decrypt_block): Likewise.
+ (cast_setkey): Declared argument `key' const.
+
+ * blowfish.c (do_bf_setkey): Declared argument `key' const.
+ (encrypt_block): Declared argument `inbuf' const.
+ (encrypt_block): Likewise.
+
+
+
+ * cipher.c: Remove CIPHER_ALGO_DUMMY related code.
+ Removed struct cipher_table_s.
+ Changed definition of cipher_table.
+ Removed definition of disabled_algos.
+ (ciphers_registered, default_ciphers_registered): New variables.
+ (REGISTER_DEFAULT_CIPHERS): New macro.
+ (dummy_setkey): Declared argument `key' const.
+ (dummy_encrypt_block): Declared argument `inbuf' const.
+ (dummy_encrypt_block): Likewise.
+ (dummy_encrypt_stream): Likewise.
+ (dummy_encrypt_stream): Likewise.
+ (dummy_setkey): Use `unsigned char' instead of `byte'.
+ (dummy_encrypt_block): Likewise.
+ (dummy_decrypt_block): Likewise.
+ (dummy_encrypt_stream): Likewise.
+ (dummy_decrypt_stream): Likewise.
+ (gcry_cipher_register_default): New function.
+ (gcry_cipher_lookup_func_id): New function.
+ (gcry_cipher_lookup_func_name): New function.
+ (gcry_cipher_lookup_id): New function.
+ (gcry_cipher_lookup_name): New function.
+ (gcry_cipher_id_new): New function.
+ (gcry_cipher_register): New function.
+ (gcry_cipher_unregister): New function.
+ (setup_cipher_table): Removed function.
+ (load_cipher_modules): Removed function.
+ (gcry_cipher_map_name): Adjusted to use new module management.
+ (cipher_algo_to_string): Likewise.
+ (disable_cipher_algo): Likewise.
+ (check_cipher_algo): Likewise.
+ (cipher_get_keylen): Likewise.
+ (cipher_get_blocksize): Likewise.
+ (gcry_cipher_open): Likewise.
+ (struct gcry_cipher_handle): Replaced members algo, algo_index,
+ blocksize, setkey, encrypt, decrypt, stencrypt, stdecrypt with one
+ member: cipher.
+ (gcry_cipher_open): Adjusted code for new handle structure.
+ (cipher_setkey): Likewise.
+ (cipher_setiv): Likewise.
+ (cipher_reset): Likewise.
+ (do_ecb_encrypt): Likewise.
+ (do_ecb_decrypt): Likewise.
+ (do_cbc_encrypt): Likewise.
+ (do_cbc_decrypt): Likewise.
+ (do_cfb_encrypt): Likewise.
+ (do_cfb_decrypt): Likewise.
+ (do_ctr_encrypt): Likewise.
+ (cipher_encrypt): Likewise.
+ (gcry_cipher_encrypt): Likewise.
+ (cipher_decrypt): Likewise.
+ (gcry_cipher_decrypt): Likewise.
+ (cipher_sync): Likewise.
+ (gcry_cipher_ctl): Likewise.
+
+ * pubkey.c: Removed struct pubkey_table_s.
+ Changed definition of pubkey_table.
+ Removed definition of disabled_algos.
+ (pubkeys_registered, default_pubkeys_registered): New variables.
+ (REGISTER_DEFAULT_PUBKEYS): New macro.
+ (setup_pubkey_table): Removed function.
+ (load_pubkey_modules): Removed function.
+ (gcry_pubkey_register_default): New function.
+ (gcry_pubkey_lookup_func_id): New function.
+ (gcry_pubkey_lookup_func_name): New function.
+ (gcry_pubkey_lookup_id): New function.
+ (gcry_pubkey_lookup_name): New function.
+ (gcry_pubkey_id_new): New function.
+ (gcry_pubkey_register): New function.
+ (gcry_pubkey_unregister): New function.
+ (gcry_pk_map_name): Adjusted to use new module management.
+ (gcry_pk_algo_name): Likewise.
+ (disable_pubkey_algo): Likewise.
+ (check_pubkey_algo): Likewise.
+ (pubkey_get_npkey): Likewise.
+ (pubkey_get_nskey): Likewise.
+ (pubkey_get_nsig): Likewise.
+ (pubkey_get_nenc): Likewise.
+ (pubkey_generate): Likewise.
+ (pubkey_check_secret_key): Likewise.
+ (pubkey_encrypt): Likewise.
+ (pubkey_decrypt): Likewise.
+ (pubkey_sign): Likewise.
+ (pubkey_verify): Likewise.
+ (gcry_pk_get_nbits): Likewise.
+ (gcry_pk_algo_info): Likewise.
+
+ * md.c: Removed struct md_digest_list_s.
+ (digest_list): Changed definition.
+ (digests_registered, default_digests_registered): New variables.
+ (REGISTER_DEFAULT_DIGESTS): New macro.
+ (new_list_item): Removed function.
+ (setup_md_table): Removed function.
+ (load_digest_module): Removed function.
+ (gcry_digest_register_default): New function.
+ (gcry_digest_lookup_func_id): New function.
+ (gcry_digest_lookup_func_name): New function.
+ (gcry_digest_lookup_id): New function.
+ (gcry_digest_lookup_name): New function.
+ (gcry_digest_id_new): New function.
+ (gcry_digest_register): New function.
+ (gcry_digest_unregister): New function.
+ (GcryDigestEntry): New type.
+ (struct gcry_md_context): Adjusted type of `list'.
+ (gcry_md_map_name): Adjusted to use new module management.
+ (digest_algo_to_string): Likewise.
+ (check_digest_algo): Likewise.
+ (md_enable): Likewise.
+ (md_digest_length): Likewise.
+ (md_asn_oid): Likewise.
+
+2003-04-07 Moritz Schulte <moritz@g10code.com>
+
+ * pubkey.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA,
+ PUBKEY_ALGO_RSA with GCRY_PK_RSA and PUBKEY_ALGO_ELGAMAL with
+ GCRY_PK_ELG.
+
+ * dsa.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA.
+
+2003-04-01 Moritz Schulte <moritz@g10code.com>
+
+ * des.c: Removed checks for GCRY_CIPHER_3DES and GCRY_CIPHER_DES.
+
+2003-03-31 Moritz Schulte <moritz@g10code.com>
+
+ * tiger.c (tiger_get_info): Do not declare static.
+ * sha256.c (sha256_get_info): Likewise.
+ * sha1.c (sha1_get_info): Likewise.
+ * rmd160.c (rmd160_get_info): Likewise.
+ * md5.c (md5_get_info): Likewise.
+ * md4.c (md4_get_info): Likewise.
+ * crc.c (crc_get_info): Likewise.
+
+ * md.c (load_digest_module): Call setup_md_table during
+ initialization.
+ (new_list_item): Link new element into digest_list.
+
+ * cipher.c (do_ctr_decrypt): Made do_ctr_encrypt act as a wrapper
+ for do_ctr_encrypt, since these functions are identical.
+
+2003-03-30 Simon Josefsson <jas@extundo.com>
+
+ * cipher.c (struct gcry_cipher_handle): Add counter field.
+ (gcry_cipher_open): Add CTR.
+ (cipher_reset): Clear counter field.
+ (do_ctr_encrypt, do_ctr_decrypt): New functions.
+ (cipher_encrypt, cipher_decrypt): Call CTR functions.
+ (gcry_cipher_ctl): Add SET_CTR to set counter.
+
+2003-03-30 Moritz Schulte <moritz@g10code.com>
+
+ * rsa.c (_gcry_rsa_blind): New function.
+ (_gcry_rsa_unblind): New function.
+ (_gcry_rsa_decrypt): Use _gcry_rsa_blind and _gcry_rsa_decrypt.
+
+2003-03-26 Moritz Schulte <moritz@g10code.com>
+
+ * dynload.c (_gcry_enum_gnupgext_pubkeys): Adjust `encrypt' and
+ `decrypt' function arguments.
+ (_gcry_enum_gnupgext_pubkeys): Likewise.
+ * dynload.h: Likewise.
+
+ * pubkey.c (dummy_decrypt): Add argument: int flags.
+ (dummy_encrypt): Likewise.
+
+ * elgamal.c (_gcry_elg_encrypt): Add argument: int flags.
+ (_gcry_elg_decrypt): Likewise.
+
+ * rsa.c (_gcry_rsa_encrypt): Add argument: int flags.
+ (_gcry_rsa_decrypt): Likewise.
+
+ * pubkey.c: Add `flags' argument to members `encrypt' and
+ `decrypt' of struct `pubkey_table_s'.
+
+ * rsa.h: Add `flags' argument to function declarations.
+ * elgamal.h: Likewise.
+
+ * pubkey.c (sexp_data_to_mpi): New variable: int parsed_flags.
+ (sexp_data_to_mpi): Set `parsed_flags'.
+ (sexp_data_to_mpi): New argument: int *flags.
+ (gcry_pk_encrypt): New variable: int flags.
+ (gcry_pk_encrypt): Pass `flags' to pubkey_encrypt.
+ (pubkey_encrypt): New variable: int flags.
+ (pubkey_encrypt): Pass `flags' to pubkey encrypt function.
+ (pubkey_decrypt): Likewise.
+ (pubkey_decrypt): Pass `flags' to pubkey encrypt function.
+ (gcry_pk_encrypt): Include `flags' s-exp in return list.
+ (sexp_to_enc): New argument: int *flags.
+ (gcry_pk_decrypt): New variable: int flags.
+ (gcry_pk_decrypt): Pass `flags' to pubkey_decrypt.
+ (sexp_to_enc): New variable: int parsed_flags.
+ (sexp_to_enc): Set `parsed_flags'.
+
+2003-03-22 Simon Josefsson <jas@extundo.com>
+
+ * cipher.c (gcry_cipher_open, do_cbc_encrypt)
+ (gcry_cipher_encrypt): Support GCRY_CIPHER_CBC_MAC.
+ (gcry_cipher_ctl): Support GCRYCTL_SET_CBC_MAC.
+
+2003-03-19 Werner Koch <wk@gnupg.org>
+
+ * primegen.c (gen_prime): New args EXTRA_CHECK and EXTRA_CHECK_ARG
+ to allow for a user callback. Changed all callers.
+ (_gcry_generate_secret_prime)
+ (_gcry_generate_public_prime): Ditto, pass them to gen_prime.
+ * rsa.c (check_exponent): New.
+ (generate): Use a callback to ensure that a given exponent is
+ actually generated.
+
+2003-03-12 Moritz Schulte <moritz@g10code.com>
+
+ * primegen.c: Initialize `no_of_small_prime_numbers' statically.
+ (gen_prime): Remove calculation of `no_of_small_prime_numbers'.
+
+2003-03-03 Moritz Schulte <moritz@g10code.com>
+
+ * md.c (gcry_md_ctl): Rewritten to use same style like the other
+ functions dispatchers.
+
+2003-03-02 Moritz Schulte <moritz@g10code.com>
+
+ * cipher.c (struct gcry_cipher_handle): New member: algo_index.
+ (gcry_cipher_open): Allocate memory for two cipher contexts.
+ Initialize algo_index.
+ (cipher_setkey): Duplicate context into reserved memory.
+ (cipher_reset): New function, which resets the context and clear
+ the IV.
+ (gcry_cipher_ctl): Call cipher_reset.
+
+2003-02-23 Moritz Schulte <moritz@g10code.com>
+
+ * cipher.c: Remove (bogus) `digitp' macro definition.
+ * md.c: Likewise.
+
+ * blowfish.c (burn_stack): Removed.
+ * arcfour.c (burn_stack): Likewise.
+ * cast5.c (burn_stack): Likewise.
+ * des.c (burn_stack): Likewise.
+ * md4.c (burn_stack): Likewise.
+ * md5.c (burn_stack): Likewise.
+ * random.c (burn_stack): Likewise.
+ * rijndael.c (burn_stack): Likewise.
+ * rmd160.c (burn_stack): Likewise.
+ * sha1.c (burn_stack): Likewise.
+ * sha256.c (burn_stack): Likewise.
+ * tiger.c (burn_stack): Likewise.
+ * twofish.c (burn_stack): Likewise.
+
+ * blowfish.c: Changed all occurences of burn_stack to
+ _gcry_burn_stack.
+ * arcfour.c: Likewise.
+ * cast5.c: Likewise.
+ * des.c: Likewise.
+ * md4.c: Likewise.
+ * md5.c: Likewise.
+ * random.c: Likewise.
+ * rijndael.c: Likewise.
+ * rmd160.c: Likewise.
+ * sha1.c: Likewise.
+ * sha256.c: Likewise.
+ * tiger.c: Likewise.
+ * twofish.c: Likewise.
+
+ * arcfour.c (_gcry_arcfour_get_info): Use GCRY_CIPHER_ARCFOUR
+ instead of hard-coded value `301'.
+
+2003-01-24 Werner Koch <wk@gnupg.org>
+
+ * random.c (_gcry_register_random_progress): New.
+ (_gcry_random_progress): New.
+
+ * rndlinux.c (gather_random): Call the random progress function.
+
+2003-01-23 Werner Koch <wk@gnupg.org>
+
+ * rsa.c (generate): New arg USE_E to request a specific public
+ exponent.
+ (_gcry_rsa_generate): Ditto.
+ * elgamal.c (_gcry_elg_generate): Must add an dummy argument
+ instead of USE_E.
+ * dsa.c (_gcry_dsa_generate): Ditto.
+ * pubkey.c (dummy_generate): Ditto.
+ (pubkey_generate): Add USE_E arg and pass it down.
+ (gcry_pk_genkey): Detect "rsa-use-e" parameter and pass it to generate.
+
+ * pubkey.c (sexp_to_enc): New arg RET_MODERN.
+ (gcry_pk_decrypt): Make use of it to return a real S-expression.
+ Return better error codes.
+ (gcry_pk_verify): Return better error codes.
+
+2003-01-21 Werner Koch <wk@gnupg.org>
+
+ * random.c (gcry_random_add_bytes): Add QUALITY argument, let
+ function return an error code and disable its core for now.
+
+2003-01-21 Timo Schulz <twoaday@freakmail.de>
+
+ * random.c (gcry_random_add_bytes): New. Function to add external
+ random to the pool.
+
+2003-01-20 Simon Josefsson <jas@extundo.com>
+
+ * crc.c: New.
+ * Makefile.am (EXTRA_PROGRAMS, EXTRA_crc_SOURCES): Add crc.c.
+ * md.c (gcry_md_get_algo_dlen): Add values for CRC.
+
+2003-01-20 Werner Koch <wk@gnupg.org>
+
+ * sha256.c: New.
+ * bithelp.h (ror): New.
+ * Makfile.am: Add sha256.c.
+ * md.c (oid_table): Add values for SHA256 et al.
+ (gcry_md_get_algo_dlen): Likewise
+
+2003-01-20 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (gcry_pk_get_keygrip): Implemented keygrips for DSA
+ and ElGamal.
+
+2003-01-17 Werner Koch <wk@gnupg.org>
+
+ * cipher.c (gcry_cipher_encrypt): Reworked so that the output will
+ never contain the plaintext even if the caller did not checked the
+ return value.
+
+ * md.c (gcry_md_get_algo): Changed error code to GCRYERR_GENERAL
+ because we don't have an invalid md algo but no algorithm enabled.
+
+ * pubkey.c (gcry_pk_genkey): Changed error code for bounds check
+ of table parameters to GCRYERR_INTERNAL.
+
+ * md.c (gcry_md_open): Partly reverted Timo's change from
+ 2002-10-10 by removing the check for the algorithm. An algorithm
+ of 0 is allowed and anyway we should not double check it or check
+ it using a different function. Also fixed the flags check.
+
+ * pubkey.c (gcry_pk_encrypt): Make sure that R_CIPH points to NULL
+ on error.
+ (gcry_pk_decrypt): Ditto for R_PLAIN.
+ (gcry_pk_sign): Ditto for R_SIG.
+ (gcry_pk_genkey): Ditto for R_KEY.
+
+2003-01-16 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_write): Changed 2nd argument type to void*.
+ (gcry_md_hash_buffer): Changed type of boths buffers to void*.
+ (gcry_md_setkey): Changed 2nd argument type to void*.
+
+2003-01-15 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (sexp_data_to_mpi): New. This handles pkcs1 padding.
+ (gcry_pk_sign, gcry_pk_verify): Use it here.
+ (gcry_pk_encrypt): And here.
+ (pubkey_verify): Add debug code.
+ (sexp_to_enc): Handle flags in the input and return the pkcs1 flag
+ in a new parameter.
+ (gcry_pk_decrypt): Prepare for future pkcs1 handling.
+
+2002-12-19 Werner Koch <wk@gnupg.org>
+
+ * random.c (_gcry_random_initialize): New.
+
+2002-12-16 Werner Koch <wk@gnupg.org>
+
+ * cipher.c: Added a Teletrust specific OID for 3DES.
+
+2002-12-12 Werner Koch <wk@gnupg.org>
+
+ * md.c: Added another oddball OIW OID (sha-1WithRSAEncryption).
+
+2002-11-23 Werner Koch <wk@gnupg.org>
+
+ * md.c (load_digest_module): Enlarged checked_algos bitmap.
+ * md4.c (func_table): Fixed entry for md4.
+ Both by Simon Josephson.
+ (transform): Copy data to get the alignment straight. Tested only
+ on i386.
+
+2002-11-10 Simon Josefsson <jas@extundo.com>
+
+ * cipher.c (gcry_cipher_open): Don't reject CTS flag.
+ (do_cbc_encrypt, do_cbc_decrypt, cipher_encrypt)
+ (gcry_cipher_encrypt, cipher_decrypt)
+ (gcry_cipher_decrypt): Support CTS flag.
+ (gcry_cipher_ctl): Toggle CTS flag.
+
+2002-11-10 Werner Koch <wk@gnupg.org>
+
+ * md4.c: New. By Simon Josefsson.
+ * Makefile.am (EXTRA_PROGRAMS): Add md4.c.
+ * md.c (oid_table,gcry_md_get_algo_dlen): MD4 support.
+
+2002-10-14 Werner Koch <wk@gnupg.org>
+
+ * arcfour.c (do_encrypt_stream): Don't use increment op when
+ assigning to the same variable.
+
+2002-10-10 Timo Schulz <ts@winpt.org>
+
+ * pubkey.c (gcry_pk_genkey): Check boundaries.
+
+ * md.c (gcry_md_open): Check that algo is available and only
+ valid flag values are used.
+ (gcry_md_get_algo): Add error handling.
+
+2002-09-26 Werner Koch <wk@gnupg.org>
+
+ * md.c: Include an OID for TIGER.
+ * tiger.c (tiger_get_info): Use a regular OID.
+
+2002-09-17 Werner Koch <wk@gnupg.org>
+
+ * random.c: Replaced mutex.h by the new ath.h. Changed all calls.
+
+2002-09-16 Werner Koch <wk@gnupg.org>
+
+ * arcfour.c (do_encrypt_stream): Use register modifier and modulo.
+ According to Nikos Mavroyanopoulos this increases perfromace on
+ i386 system noticable. And I always tought gcc is clever enough.
+ * md5.c (transform): Use register modifier.
+ * rmd160.c (transform): Ditto.
+ * sha1.c (transform): Ditto. We hope that there are 6 free registers.
+ * random.c (gcry_randomize): Rewrote to avoid malloc calls.
+
+ * rndlinux.c (gather_random): Replaced remaining fprintfs by log_*.
+ * arcfour.c (do_arcfour_setkey): Ditto.
+ * twofish.c (do_twofish_setkey): Ditto.
+ * rndegd.c (gather_random): Ditto.
+ * rijndael.c (do_setkey): Ditto.
+ * random.c (_gcry_random_dump_stats): Ditto.
+ * primegen.c (_gcry_generate_elg_prime): Ditto.
+ * des.c (_gcry_des_get_info): Ditto.
+ * cast5.c (do_cast_setkey): Ditto.
+ * blowfish.c (do_bf_setkey): Ditto.
+
+2002-08-26 Werner Koch <wk@gnupg.org>
+
+ * des.c (weak_keys): Fixed one entry in the table and compared
+ all entries against the literature.
+ (selftest): Checksum the weak key table.
+
+2002-08-21 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c: Enable keygrip calculation for "openpgp-rsa".
+
+2002-08-17 Werner Koch <wk@gnupg.org>
+
+ * cipher.c (setup_cipher_table): Don't overwrite the DES entry
+ with the entry for DUMMY.
+
+2002-08-14 Werner Koch <wk@gnupg.org>
+
+ * des.c (do_des_setkey,do_des_encrypt, do_des_decrypt): New.
+ (_gcry_des_get_info): Support plain old DES.
+ * cipher.c (setup_cipher_table): Put DES into the table.
+
+2002-07-25 Werner Koch <wk@gnupg.org>
+
+ * rndunix.c (_gcry_rndunix_constructor): Prefixed with _gcry_.
+ Noted by Stephan Austermuehle.
+
+2002-07-08 Timo Schulz <ts@winpt.org>
+
+ * rndw32.c: Replaced the m_ memory functions with the real
+ gcry_ functions. Renamed all g10_ prefixed functions to log_.
+
+2002-06-12 Werner Koch <wk@gnupg.org>
+
+ * rsa.c (generate): Use e = 65537 for now.
+
+2002-06-11 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (gcry_pk_get_keygrip): Allow a "protected-private-key".
+
+2002-06-05 Timo Schulz <ts@winpt.org>
+
+ * cipher.c (gcry_cipher_encrypt, gcry_cipher_decrypt):
+ Check that the input size is a multiple of the blocksize.
+
+2002-05-23 Werner Koch <wk@gnupg.org>
+
+ * md.c (oid_table): Add an rsadsi OID for MD5.
+
+2002-05-21 Werner Koch <wk@gnupg.org>
+
+ * primegen.c, elgamal.c, dsa.c (progress): Do not print anything
+ by default. Pass an extra identifying string to the callback and
+ reserved 2 argumenst for current and total counters. Changed the
+ register function prototype.
+
+2002-05-17 Werner Koch <wk@gnupg.org>
+
+ * rndegd.c (rndegd_constructor): Fixed name of register function
+ and prefixed the function name with _gcry_.
+ * rndw32.c (rndw32_constructor): Ditto.
+ * tiger.c (tiger_constructor): Ditto.
+
+ * Makefile.am: Removed all dynamic loading stuff.
+ * dynload.c: Ditto. Now only used for the constructor system.
+
+2002-05-15 Werner Koch <wk@gnupg.org>
+
+ * random.c (gcry_random_bytes,gcry_random_bytes_secure)
+ (gcry_randomize): Make sure we are initialized.
+
+2002-05-14 Werner Koch <wk@gnupg.org>
+
+ Changed license of most files to the LGPL.
+
+2002-05-02 Werner Koch <wk@gnupg.org>
+
+ * random.c (_gcry_fast_random_poll): Initialize the module so the
+ mutex can be used.
+
+ * primegen.c (small_prime_numbers): Moved table from smallprime.c
+ * smallprime.c: File removed.
+
+ * des.c (leftkey_swap, rightkey_swap, working_memcmp): Made static.
+
+ * cipher.c (gcry_cipher_map_name): Map "RIJNDAEL" to "AES".
+ * rijndael.c (rijndael_get_info): We do only support a 128 bit
+ blocksize so it makes sense to change the algorithm strings to
+ AES.
+
+ * tiger.c (tiger_final): Removed superfluous token pasting operators.
+ * md5.c (md5_final): Ditto.
+
+2002-04-30 Werner Koch <wk@gnupg.org>
+
+ * cipher.c: Fixed list of copyright years.
+
+2002-03-18 Werner Koch <wk@gnupg.org>
+
+ * random.c (initialize): Initialize the new pool lock mutex.
+ (_gcry_fast_random_poll): Add locking and moved main
+ code out to...
+ (do_fast_random_poll): new function.
+ (read_pool): Use the new function here.
+ (get_random_bytes): Add locking.
+ (_gcry_update_random_seed_file): Ditto.
+
+2002-03-11 Werner Koch <wk@gnupg.org>
+
+ * md.c: Add rsaSignatureWithripemd160 to OID table.
+
+2002-02-20 Werner Koch <wk@gnupg.org>
+
+ * sha1.c: Removed a left over comment note. The code has been
+ rewritten from scratch in 1998. Thanks to Niels Möller for
+ reporting this misleading comment.
+
+2002-02-18 Werner Koch <wk@gnupg.org>
+
+ * rndunix.c (rndunix_constructor): Use the the new prefixed
+ function name. Reported by Jordi Mallach.
+
+2002-02-10 Werner Koch <wk@gnupg.org>
+
+ * random.c (mix_pool): Carry an extra failsafe_digest buffer
+ around to make the function more robust.
+
+2002-02-08 Werner Koch <wk@gnupg.org>
+
+ * random.c (add_randomness): Xor new data into the pool and not
+ just copy it. This avoids any choosen input attacks which are not
+ serious in our setting because an outsider won't be able to mix
+ data in and even then we keep going with a PRNG. Thanks to Stefan
+ Keller for pointing this out.
+
+2002-01-04 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (gcry_pk_genkey): Do not release skey - it is static.
+
+ * primegen.c (gen_prime): Of course we should use set_bit
+ and not set_highbit to set the second high bit.
+
+2001-12-18 Werner Koch <wk@gnupg.org>
+
+ * rsa.c (generate): Loop until we find the exact modulus size.
+ Changed the exponent to 41.
+ (rsa_get_info): s/usage/r_usage/ to avoid shadow warnings.
+ * primegen.c (gen_prime): Set 2 high order bits for secret primes.
+
+ * Makefile.am (DISTCLEANFILES): Include construct.c.
+
+2001-12-17 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (gcry_pk_get_keygrip): New - experimental.
+
+2001-12-11 Werner Koch <wk@gnupg.org>
+
+ * cipher.c: Added OIDs for AES.
+ (gcry_cipher_mode_from_oid): New.
+ (gcry_cipher_map_name): Moved OID search code to ..
+ (search_oid): .. new function.
+
+2001-12-10 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (gcry_pk_encrypt): Find the signature algorithm by name
+ and not by number.
+
+ * pubkey.c (gcry_pk_encrypt,gcry_pk_decrypt,gcry_pk_sign)
+ (gcry_pk_verify,gcry_pk_testkey, gcry_pk_genkey)
+ (gcry_pk_get_nbits): Release the arrays. Noted by Nikos
+ Mavroyanopoulos.
+
+2001-12-06 Werner Koch <wk@gnupg.org>
+
+ * cipher.c (gcry_cipher_map_name): Look also for OIDs prefixed
+ with "oid." or "OID.".
+
+2001-12-05 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c (algo_info_table): Fixed entry for openpgp-rsa.
+
+2001-11-24 Werner Koch <wk@gnupg.org>
+
+ * pubkey.c: Added the rsaEncryption OID to the tables.
+ (sexp_to_key): Add an arg to return the index of the algorithm,
+ changed all callers.
+ (gcry_pk_sign): Find the signature algorithm by name and not by
+ number.
+ (gcry_pk_get_nbits): Fixed so that we can now really pass a secret
+ key to get the result.
+
+ * md.c (gcry_md_map_name): Look also for OIDs prefixed with "oid."
+ or "OID." so that an OID string can be used as an S-Exp token.
+
+2001-11-20 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_map_name): Lookup by OID if the the name begins
+ with a digit.
+ (oid_table): New.
+
+2001-11-16 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_info): New operator GCRYCTL_IS_ALGO_ENABLED.
+
+2001-11-07 Werner Koch <wk@gnupg.org>
+
+ * md.c (gcry_md_hash_buffer): Close the handle which was left open
+ for algorithms other than rmd160.
+
+2001-08-08 Werner Koch <wk@gnupg.org>
+
+ * rndw32.c (gather_random): Use toolhelp in addition to the NT
+ gatherer for Windows2000. Suggested by Sami Tolvanen.
+
+ * random.c (read_pool): Fixed length check, this used to be one
+ byte to strict. Made an assert out of it because the caller has
+ already made sure that only poolsize bytes are requested.
+ Reported by Marcus Brinkmann.
+
+2001-08-03 Werner Koch <wk@gnupg.org>
+
+ * cipher.c (cipher_encrypt, cipher_decrypt): Prepare to return
+ errors. We have to change the interface to all ciphers to make
+ this really work but we should do so to prepare for hardware
+ encryption modules.
+ (gcry_cipher_encrypt, gcry_cipher_decrypt): Return the error and
+ set lasterr.
+ (gcry_cipher_ctl): Make sure that errors from setkey are returned.
+
+2001-08-02 Werner Koch <wk@gnupg.org>
+
+ * rndlinux.c (gather_random): casted a size_t arg to int so that
+ the format string is correct. Casting is okay here and avoids
+ translation changes.
+
+ * random.c (fast_random_poll): Do not check the return code of
+ getrusage.
+
+ * rndunix.c: Add a signal.h header to avoid warnings on Solaris 7
+ and 8.
+
+ * tiger.c (print_abc,print_data): Removed.
+
+ * rijndael.c, des.c, blowfish.c, twofish.c, cast5.c, arcfour.c
+ (burn_stack): New. Add wrappers for most functions to be able to
+ call burn_stack after the function invocation. This methods seems
+ to be the most portable way to zeroise the stack used. It does
+ only work on stack frame based machines but it is highly portable
+ and has no side effects. Just setting the automatic variables at
+ the end of a function to zero does not work well because the
+ compiler will optimize them away - marking them as volatile would
+ be bad for performance.
+ * md5.c, sha1.c, rmd160.c, tiger.c (burn_stack): Likewise.
+ * random.c (burn_stack): New.
+ (mix_pool): Use it here to burn the stack of the mixblock function.
+
+ * primegen.c (_gcry_generate_elg_prime): Freed q at 3 places.
+ Thanks to Tommi Komulainen.
+
+ * arcfour.c (arcfour_setkey): Check the minimim keylength against
+ bytes and not bits.
+ (selftest): Must reset the key before decryption.
+
+2001-05-31 Werner Koch <wk@gnupg.org>
+
+ * sha1.c (sha1_init): Made static.
+
+ Changed all g10_ prefixed function names as well as some mpi_
+ function names to cope with the introduced naming changes.
+
+ * md.c (prepare_macpads): Made key const.
+
+2001-05-28 Werner Koch <wk@gnupg.org>
+
+ * rndegd.c (gather_random): Removed the use of tty_printf.
+
+2001-03-29 Werner Koch <wk@gnupg.org>
+
+ * md5.c (md5_final): Fixed calculation of hashed length. Thanks
+ to disastry@saiknes.lv for pointing out that it was horrible wrong
+ for more than 512MB of input.
+ * sha1.c (sha1_final): Ditto.
+ * rmd160.c (rmd160_final): Ditto.
+ * tiger.c (tiger_final): Ditto.
+
+ * blowfish.c (encrypt,do_encrypt): Changed name to do_encrypt to
+ avoid name clashes with an encrypt function in stdlib.h of
+ Dynix/PIX. Thanks to Gene Carter.
+ * elgamal.c (encrypt,do_encrypt): Ditto.
+
+ * twofish.c (gnupgext_enum_func): Use only when when compiled as a
+ module.
+ * rijndael.c (gnupgext_enum_func): Ditto.
+
+ * tiger.c (tiger_get_info): Return "TIGER192" and not just
+ "TIGER". By Edwin Woudt.
+
+ * random.c: Always include time.h - standard requirement. Thanks
+ to James Troup.
+
+ * rndw32.c: Fixes to the macros.
+
+2001-01-11 Werner Koch <wk@gnupg.org>
+
+ * cipher.c (cipher_encrypt,gcry_cipher_encrypt): Use blocksize and
+ not 8.
+
+2000-12-19 Werner Koch <wk@gnupg.org>
+
+ Major change:
+ Removed all GnuPG stuff and renamed this piece of software
+ to gcrypt.
+
+2000-11-14 Werner Koch <wk@gnupg.org>
+
+ * dsa.c (test_keys): Replaced mpi_alloc by gcry_mpi_new and
+ mpi_free by gcry_mpi_release.
+ * elgamal.c (test_keys,generate): Ditto, also for mpi_alloc_secure.
+ * rsa.c (test_keys,generate,rsa_verify): Ditto.
+ * primegen.c (generate_elg_prime): Ditto.
+ (gen_prime): Ditto and removed nlimbs.
+
+ * rsa.c (generate): Allocate 2 more vars in secure memory.
+
+ * Makefile.am (OMIT_DEPENDENCIES): Hack to work around dependency
+ problems.
+
+2000-10-09 Werner Koch <wk@gnupg.org>
+
+ * arcfour.c, arcfour.h: New.
+ * cipher.c (cipher_encrypt, cipher_decrypt): Add stream mode.
+ (setup_cipher_table): Add Arcfour.
+ (gcry_cipher_open): Kludge to allow stream mode.
+
+Wed Oct 4 13:16:18 CEST 2000 Werner Koch <wk@openit.de>
+
+ * sha1.c (transform): Use rol() macro. Actually this is not needed
+ for a newer gcc but there are still aoter compilers.
+
+ * rsa.c (test_keys): Use new random function.
+
+ * md.c (gcry_md_setkey): New function to overcome problems with
+ const conflics.
+ (gcry_md_ctl): Pass set key to the new functions.
+
+ * rijndael.c: New.
+ * cipher.c: Add Rijndael support.
+
+Mon Sep 18 16:35:45 CEST 2000 Werner Koch <wk@openit.de>
+
+ * rndlinux.c (open_device): Loose random device checking.
+ By Nils Ellmenreich.
+
+ * random.c (fast_random_poll): Check ENOSYS for getrusage.
+ * rndunix.c: Add 2 sources for QNX. By Sam Roberts.
+
+ * pubkey.c (gcry_pk_algo_info): Add GCRYCTL_GET_ALGO_USAGE.
+
+ * rsa.c: Changed the comment about the patent.
+ (secret): Speed up by using the CRT. For a 2k keys this
+ is about 3 times faster.
+ (stronger_key_check): New but unused code to check the secret key.
+ * Makefile.am: Included rsa.[ch].
+ * pubkey.c: Enabled RSA support.
+ (pubkey_get_npkey): Removed RSA workaround.
+
+Mon Jul 31 10:04:47 CEST 2000 Werner Koch <wk@openit.de>
+
+ * pubkey.c: Replaced all gcry_sexp_{car,cdr}_{data,mpi} by the new
+ gcry_sexp_nth_{data,mpi} functions.
+
+Tue Jul 25 17:44:15 CEST 2000 Werner Koch <wk@openit.de>
+
+ * pubkey.c (exp_to_key,sexp_to_sig,sexp_to_enc,gcry_pk_encrypt,
+ gcry_pk_decrypt,gcry_pk_sign,gcry_pk_genkey): Changed to work with
+ the new S-Exp interface.
+
+Mon Jul 17 16:35:47 CEST 2000 Werner Koch <wk@>
+
+ * random.c (gather_faked): Replaced make_timestamp by time(2) again.
+
+Fri Jul 14 19:38:23 CEST 2000 Werner Koch <wk@>
+
+ * md.c (gcry_md_ctl): Support GCRYCTL_{START,STOP}_DUMP.
+
+ * Makefile.am: Never compile mingw32 as module.
+
+ * Makefile.am: Tweaked module build and removed libtool
+
+ * Makefile.am: Replaced -O1 by -O. Suggested by Alec Habig.
+
+ * elgamal.c (sign): Removed inactive code.
+
+ * rsa.c, rsa.h: New based on the old module version (only in CVS for now).
+ * pubkey.c (setup_pubkey_table): Added commented support for RSA.
+
+ * rndunix.c (waitpid): New. For UTS 2.1. All by Dave Dykstra.
+ (my_popen): Do the FD_CLOEXEC only if it is available
+ (start_gatherer): Cope with missing _SC_OPEN_MAX
+
+ * rndunix.c: Add some more headers for QNX. By Sam Roberts.
+
+ * rndegd.c (gather_random): Shortcut level 0.
+ * rndunix.c (gather_random): Ditto.
+ * rndw32.c (gather_random): Ditto.
+
+ * rndw32.c: Replaced with code from Cryptlib and commented the old stuff.
+ * rndw32.c: Add some debuging code enabled by an environment variable.
+
+ * random.c (read_seed_file): Binary open for DOSish system
+ (update_random_seed_file): Ditto.
+ * random.c [MINGW32]: Include process.h for getpid.
+ * random.c (fast_random_poll): Add clock_gettime() as fallback for
+ system which support this POSIX.4 fucntion. By Sam Roberts.
+
+ * random.c (read_seed_file): Removed the S_ISLNK test becuase it
+ is already covered by !S_ISREG and is not defined in Unixware.
+ Reported by Dave Dykstra.
+ (update_random_seed_file): Silently ignore update request when pool
+ is not filled.
+
+ * random.c (read_seed_file): New.
+ (set_random_seed_file): New.
+ (read_pool): Try to read the seeding file.
+ (update_random_seed_file): New.
+
+ (read_pool): Do an initial extra seeding when level 2 quality random
+ is requested the first time. This requestes at least POOLSIZE/2 bytes
+ of entropy. Compined with the seeding file this should make normal
+ random bytes cheaper and increase the quality of the random bytes
+ used for key generation.
+
+ * random.c (read_pool): Print a more friendly error message in
+ cases when too much random is requested in one call.
+
+ * random.c (fast_random_poll): Check whether RUSAGE_SELF is defined;
+ this is not the case for some ESIX and Unixware, although they have
+ getrusage().
+
+ * primegen.c (generate_elg_prime): All primes are now generated with
+ the lowest random quality level. Because they are public anyway we
+ don't need stronger random and by this we do not drain the systems
+ entropy so much.
+
+ * primegen.c (register_primegen_progress): New.
+ * dsa.c (register_pk_dsa_progress): New.
+ * elgamal.c (register_pk_elg_progress): New.
+
+ * elgamal.c (wiener_map): New.
+ (gen_k): Use a much smaller k.
+ (generate): Calculate the qbits using the wiener map and
+ choose an x at a size comparable to the one choosen in gen_k
+
+ * rmd160.c (rmd160_get_info): Moved casting to the left side due to a
+ problem with UTS4.3. Suggested by Dave Dykstra.
+ * sha1.c (sha1_get_info): Ditto.
+ * tiger.c (tiger_get_info): Ditto.
+ * md5.c (md5_get_info): Ditto
+ * des.c (des_get_info): Ditto.
+ * blowfish.c (blowfish_get_info): Ditto.
+ * cast5.c (cast5_get_info): Ditto.
+ * twofish.c (twofish_get_info): Ditto.
+
+Fri Mar 24 11:25:45 CET 2000 Werner Koch <wk@openit.de>
+
+ * md.c (md_open): Add hmac arg and allocate space for the pads.
+ (md_finalize): Add HMAC support.
+ (md_copy): Ditto.
+ (md_close): Ditto.
+ (gcry_md_reset): Ditto.
+ (gcry_md_ctl): Ditto.
+ (prepare_macpdas): New.
+
+Mon Mar 13 19:22:46 CET 2000 Werner Koch <wk@openit.de>
+
+ * md.c (gcry_md_hash_buffer): Add support for the other algorithms.
+
+Mon Jan 31 16:37:34 CET 2000 Werner Koch <wk@gnupg.de>
+
+ * genprime.c (generate_elg_prime): Fixed returned factors which never
+ worked for non-DSA keys.
+
+Thu Jan 27 18:00:44 CET 2000 Werner Koch <wk@gnupg.de>
+
+ * pubkey.c (sexp_to_key): Fixed mem leaks in case of errors.
+
+Mon Jan 24 22:24:38 CET 2000 Werner Koch <wk@gnupg.de>
+
+ * pubkey.c (gcry_pk_decrypt): Implemented.
+ (gcry_pk_encrypt): Implemented.
+ (gcry_pk_testkey): New.
+ (gcry_pk_genkey): New.
+ (pubkey_decrypt): Made static.
+ (pubkey_encrypt): Ditto.
+ (pubkey_check_secret_key): Ditto.
+ (pubkey_generate): Ditto.
+
+Mon Jan 24 13:04:28 CET 2000 Werner Koch <wk@gnupg.de>
+
+ * pubkey.c (pubkey_nbits): Removed and replaced by ...
+ (gcry_pk_get_nbits): this new one.
+
+Wed Dec 8 21:58:32 CET 1999 Werner Koch <wk@gnupg.de>
+
+ * dsa.c: s/mpi_powm/gcry_mpi_powm/g
+ * elgamal.c: Ditto.
+ * primegen.c: Ditto.
+
+ * : Replaced g10_opt_verbose by g10_log_verbosity().
+
+ * Makefile.am (INCLUDES): removed intl, add ../gcrypt
+
+Fri Nov 19 17:15:20 CET 1999 Werner Koch <wk@gnupg.de>
+
+ * dynload.c (cmp_filenames): New to replaced compare_filename() in
+ module.
+ (register_cipher_extension): Removed the tilde expansion stuff.
+ * rndeg.c (my_make_filename): New.
+
+ * : Replaced header util.h by g10lib.h
+
+ * random.c (gather_faked): Replaced make_timestamp by time(2).
+ Disabled wrning printed with tty_printf.
+ * rndlinux.c (gather_random): Always use fprintf instead of tty_xxx;
+ this should be replaced by a callback function.
+
+ * primegen.c (gen_prime): Use gcry_mpi_randomize.
+ (is_prime): Ditto.
+ * elgamal.c (test_keys): Ditto.
+ * dsa.c (test_keys): Ditto.
+
+ * cipher.c (gcry_cipher_close): Die on invalid handle.
+
+Mon Nov 15 21:36:02 CET 1999 Werner Koch <wk@gnupg.de>
+
+ * elgamal.c (gen_k): Use the new random API.
+ (generate): Ditto.
+ * dsa.c (gen_k): Ditto.
+ (generate): Ditto.
+
+Sat Nov 13 17:44:23 CET 1999 Werner Koch <wk@gnupg.de>
+
+ * pubkey.c (disable_pubkey_algo): Made static.
+ (gcry_pk_ctl): New.
+
+ * random.c (get_random_bits): Renamed to ...
+ (get_random_bytes): ... this and made static.
+ (gcry_random_bytes): New.
+ (gcry_random_bytes_secure): New.
+ (randomize_buffer): Renamed to ...
+ (gcry_randomize): ...this.
+
+ * md.c (gcry_md_hash_buffer): New.
+
+ * pubkey.c (gcry_pk_algo_info): 4 new commands.
+ (pubkey_get_npkey): Made static.
+ (pubkey_get_nskey): Made static.
+ (pubkey_get_nsig): Made static.
+ (pubkey_get_nenc): Made static.
+
+ * pubkey.c: Removed all G10ERR_xxx.
+ * cipher.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_CIPHER_ALGO.
+ * md.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_MD_ALGO.
+ * cast5.c (cast_setkey): Changed errocodes to GCRYERR_xxx.
+ * blowfish.c: Ditto.
+ * des.c: Ditto.
+ * twofish.c: Ditto.
+ * dsa.c: Ditto.
+ * elgamal.c: Ditto.
+
+ * g10c.c: Removed
+
+ * cipher.c (gcry_cipher_open): Replaced alloc functions and return NULL
+ if we are out of core.
+ * dynload.c: Replaced all memory allocation functions.
+ * md.c: Ditto.
+ * primegen.c: Ditto.
+ * pubkey.c: Ditto.
+ * random.c: Ditto.
+ * rndw32.c: Ditto.
+ * elgamal.c: Ditto.
+ * dsa.c: Ditto.
+
+Tue Oct 26 14:10:21 CEST 1999 Werner Koch <wk@gnupg.de>
+
+ * elgamal.c (sign): Hugh found strange code here. Replaced by BUG().
+
+ * cipher.c: Merged with gcrypt/symapi.c.
+
+ * pubkey.c (string_to_pubkey_algo): Renamed function to ...
+ (gcry_pk_map_name): ... this.
+ (pubkey_algo_to_string): Renamed function to ...
+ (gcry_pk_algo_name): ... this.
+ (gcry_pk_algo_info): New.
+ * pubkey.c: Merged with gcrypt/pkapi.c.
+
+ * md.c (md_reset): Clear finalized; thanks to Ulf Moeller for
+ fixing this bug.
+
+ * md.c: Merged with gcrypt/mdapi.c
+
+Wed Sep 15 14:39:59 CEST 1999 Michael Roth <mroth@nessie.de>
+
+ * des.c: Various speed improvements: One bit pre rotation
+ trick after initial permutation (Richard Outerbridge).
+ Finished test of SSLeay Tripple-DES patterns.
+
+Wed Sep 15 16:22:17 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndw32.c: New.
+
+Mon Sep 13 10:51:29 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * bithelp.h: New.
+ * rmd160.h, sha1.h, md5.h: Use the rol macro from bithelp.h
+
+Tue Sep 7 16:23:36 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * Makefile.am: Fixed seds for latest egcc. By Ollivier Robert.
+
+Mon Sep 6 19:59:08 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * des.c (selftest): Add some testpattern
+
+Mon Aug 30 20:38:33 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * cipher.c (do_cbc_encrypt): Fixed serious bug occuring when not using
+ in place encryption. Pointed out by Frank Stajano.
+
+Mon Jul 26 09:34:46 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * md5.c (md5_final): Fix for a SCO cpp bug.
+
+Thu Jul 15 10:15:35 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * elgamal.c (elg_check_secret_key,elg_encrypt
+ elg_decrypt,elg_sign,elg_verify): Sanity check on the args.
+ * dsa.c (dsa_check_secret_key,dsa_sign,dsa_verify): Ditto.
+
+ * pubkey.c (disable_pubkey_algo): New.
+ (check_pubkey_algo2): Look at disabled algo table.
+ * cipher.c (disable_cipher_algo): New.
+ (check_cipher_algo): Look at disabled algo table.
+
+Wed Jul 7 13:08:40 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * Makefile.am: Support for libtool.
+
+Fri Jul 2 11:45:54 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * dsa.c (gen_k): Changed algorithm to consume less random bytes
+ * elgamal.c (gen_k): Ditto.
+
+ * random.c (random_dump_stats): New.
+
+Thu Jul 1 12:47:31 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * primegen.c, elgamal.c, dsa.c (progess): New and replaced all
+ fputc with a call to this function.
+
+Sat Jun 26 12:15:59 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndegd.c (do_write): s/ssize_t/int/ due to SunOS 4.1 probs.
+
+ * cipher.c (do_cbc_encrypt, do_cbc_decrypt): New.
+
+ * dynload.c (HAVE_DL_SHL_LOAD): Map hpux API to dlopen (Dave Dykstra).
+ * Makefile.am (install-exec-hook): Removed.
+
+Sun May 23 14:20:22 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * cipher.c (setup_cipher_table): Enable Twofish
+
+ * random.c (fast_random_poll): Disable use of times() for mingw32.
+
+Mon May 17 21:54:43 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * dynload.c (register_internal_cipher_extension): Minor init fix.
+
+Tue May 4 15:47:53 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * primegen.c (gen_prime): Readded the Fermat test. Fixed the bug
+ that we didn't correct for step when passing the prime to the
+ Rabin-Miller test which led to bad performance (Stefan Keller).
+ (check_prime): Add a first Fermat test.
+
+Sun Apr 18 10:11:28 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * cipher.c (cipher_setiv): Add ivlen arg, changed all callers.
+
+ * random.c (randomize_buffer): alway use secure memory because
+ we can't use m_is_secure() on a statically allocated buffer.
+
+ * twofish.c: Replaced some macros by a loop to reduce text size.
+ * Makefile.am (twofish): No more need for sed editing.
+
+Fri Apr 9 12:26:25 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * cipher.c (cipher_open): Reversed the changes for AUTO_CFB.
+
+ * blowfish.c: Dropped the Blowfish 160 mode.
+ * cipher.c (cipher_open): Ditto.
+ (setup_cipher_table): Ditto. And removed support of twofish128
+
+Wed Apr 7 20:51:39 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * random.c (get_random_bits): Can now handle requests > POOLSIZE
+
+ * cipher.c (cipher_open): Now uses standard CFB for automode if
+ the blocksize is gt 8 (according to rfc2440).
+
+ * twofish.c: Applied Matthew Skala's patches for 256 bit key.
+
+Tue Apr 6 19:58:12 CEST 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * random.c (get_random_bits): Can now handle requests > POOLSIZE
+
+ * cipher.c (cipher_open): Now uses standard CFB for automode if
+ the blocksize is gt 8 (according to rfc2440).
+
+Sat Mar 20 11:44:21 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndlinux.c (tty_printf) [IS_MODULE]: Removed.
+
+ * rndegd.c (gather_random): Some fixes.
+
+Wed Mar 17 13:09:03 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndegd.c (do_read): New.
+ (gather_random): Changed the implementation.
+
+Mon Mar 8 20:47:17 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * dynload.c (DLSYM_NEEDS_UNDERSCORE): Renamed.
+
+Fri Feb 26 17:55:41 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * md.c: Nearly a total rewrote.
+
+Wed Feb 24 11:07:27 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * cipher.c (context): Fixed alignment
+ * md.c: Ditto.
+
+ * rndegd.c: New
+
+Mon Feb 22 20:04:00 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndegd.c: New.
+
+Wed Feb 10 17:15:39 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * Makefile.am: Modules are now figured out by configure
+ * construct.c: New. Generated by configure. Changed all modules
+ to work with that.
+ * sha1.h: Removed.
+ * md5.h: Removed.
+
+ * twofish.c: Changed interface to allow Twofish/256
+
+ * rndunix.c (start_gatherer): Die on SIGPIPE.
+
+Wed Jan 20 18:59:49 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndunix.c (gather_random): Fix to avoid infinite loop.
+
+Sun Jan 17 11:04:33 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * des.c (is_weak_key): Replace system memcmp due to bugs
+ in SunOS's memcmp.
+ (des_get_info): Return error on failed selftest.
+ * twofish.c (twofish_setkey): Return error on failed selftest or
+ invalid keylength.
+ * cast5.c (cast_setkey): Ditto.
+ * blowfish.c (bf_setkey): Return error on failed selftest.
+
+Tue Jan 12 11:17:18 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * random.c (random_is_faked): New.
+
+ * tiger.c: Only compile if we have the u64 type
+
+Sat Jan 9 16:02:23 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndunix.c (gather_random): check for setuid.
+
+ * Makefile.am: Add a way to staically link random modules
+
+Thu Jan 7 18:00:58 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * md.c (md_stop_debug): Do a flush first.
+ (md_open): size of buffer now depends on the secure parameter
+
+Sun Jan 3 15:28:44 CET 1999 Werner Koch <wk@isil.d.shuttle.de>
+
+ * rndunix.c (start_gatherer): Fixed stupid ==/= bug
+
+1998-12-31 Geoff Keating <geoffk@ozemail.com.au>
+
+ * des.c (is_weak_key): Rewrite loop end condition.
+
+Tue Dec 29 14:41:47 CET 1998 Werner Koch <wk@isil.d.shuttle.de>
+
+ * random.c: add unistd.h for getpid().
+ (RAND_MAX): Fallback value for Sun.
+
+Wed Dec 23 17:12:24 CET 1998 Werner Koch <wk@isil.d.shuttle.de>
+
+ * md.c (md_copy): Reset debug.
+
+Mon Dec 14 21:18:49 CET 1998 Werner Koch <wk@isil.d.shuttle.de>
+
+ * random.c (read_random_source): Changed the interface to the
+ random gathering function.
+ (gather_faked): Use new interface.
+ * dynload.c (dynload_getfnc_fast_random_poll): Ditto.
+ (dynload_getfnc_gather_random): Ditto.
+ * rndlinux.c (gather_random): Ditto.
+ * rndunix.c (gather_random): Ditto.
+
+Sat Dec 12 18:40:32 CET 1998 Werner Koch <wk@isil.d.shuttle.de>
+
+ * dynload.c (SYMBOL_VERSION): New to cope with system which needs
+ underscores.
+
+ * rndunix.c: Rewrote large parts
+
+Thu Dec 10 20:15:36 CET 1998 Werner Koch <wk@isil.d.shuttle.de>
+
+ * dynload.c (load_extension): increased needed verbosity level.
+
+ * random.c (fast_random_poll): Fallback to a default fast random
+ poll function.
+ (read_random_source): Always use the faked entroy gatherer if no
+ gather module is available.
+ * rndlinux.c (fast_poll): Removed.
+ * rndunix.c (fast_poll): Removed.
+
+
+Wed Nov 25 12:33:41 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rand-*.c: Removed.
+ * rndlinux.c : New.
+ * rndunix.c : New.
+ * random.c : Restructured the interface to the gather modules.
+ (intialize): Call constructor functions
+ (read_radnom_source): Moved to here.
+ * dynload.c (dynload_getfnc_gather_random): New.
+ (dynload_getfnc_fast_random_poll): New.
+ (register_internal_cipher_extension): New.
+ (register_cipher_extension): Support of internal modules.
+
+Sun Nov 8 17:44:36 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rand-unix.c (read_random_source): Removed the assert.
+
+Mon Oct 19 18:34:30 1998 me,,, (wk@tobold)
+
+ * pubkey.c: Hack to allow us to give some info about RSA keys back.
+
+Thu Oct 15 11:47:57 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * dynload.c: Support for DLD
+
+Wed Oct 14 12:13:07 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rand-unix.c: Now uses names from configure for /dev/random.
+
+1998-10-10 SL Baur <steve@altair.xemacs.org>
+
+ * Makefile.am: fix sed -O substitutions to catch -O6, etc.
+
+Tue Oct 6 10:06:32 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rand-unix.c (HAVE_GETTIMEOFDAY): Fixed (was ..GETTIMEOFTIME :-)
+ * rand-dummy.c (HAVE_GETTIMEOFDAY): Ditto.
+
+Mon Sep 28 13:23:09 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * md.c (md_digest): New.
+ (md_reset): New.
+
+Wed Sep 23 12:27:02 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * tiger.c (TIGER_CONTEXT): moved "buf", so that it is 64 bit aligned.
+
+Mon Sep 21 06:22:53 1998 Werner Koch (wk@(none))
+
+ * des.c: Some patches from Michael.
+
+Thu Sep 17 19:00:06 1998 Werner Koch (wk@(none))
+
+ * des.c : New file from Michael Roth <mroth@nessie.de>
+
+Mon Sep 14 11:10:55 1998 Werner Koch (wk@(none))
+
+ * blowfish.c (bf_setkey): Niklas Hernaeus patch to detect weak keys.
+
+Mon Sep 14 09:19:25 1998 Werner Koch (wk@(none))
+
+ * dynload.c (RTLD_NOW): Now defined to 1 if it is undefined.
+
+Mon Sep 7 17:04:33 1998 Werner Koch (wk@(none))
+
+ * Makefile.am: Fixes to allow a different build directory
+
+Thu Aug 6 17:25:38 1998 Werner Koch,mobil,,, (wk@tobold)
+
+ * random.c (get_random_byte): Removed and changed all callers
+ to use get_random_bits()
+
+Mon Jul 27 10:30:22 1998 Werner Koch (wk@(none))
+
+ * cipher.c : Support for other blocksizes
+ (cipher_get_blocksize): New.
+ * twofish.c: New.
+ * Makefile.am: Add twofish module.
+
+Mon Jul 13 21:30:52 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * random.c (read_pool): Simple alloc if secure_alloc is not set.
+ (get_random_bits): Ditto.
+
+Thu Jul 9 13:01:14 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * dynload.c (load_extension): Function now nbails out if
+ the program is run setuid.
+
+Wed Jul 8 18:58:23 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rmd160.c (rmd160_hash_buffer): New.
+
+Thu Jul 2 10:50:30 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * cipher.c (cipher_open): algos >=100 use standard CFB
+
+Thu Jun 25 11:18:25 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * Makefile.am: Support for extensions
+
+Thu Jun 18 12:09:38 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * random.c (mix_pool): simpler handling for level 0
+
+Mon Jun 15 14:40:48 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * tiger.c: Removed from dist, will reappear as dynload module
+
+Sat Jun 13 14:16:57 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * pubkey.c: Major changes to allow extensions. Changed the inteface
+ of all public key ciphers and added the ability to load extensions
+ on demand.
+
+ * misc.c: Removed.
+
+Wed Jun 10 07:52:08 1998 Werner Koch,mobil,,, (wk@tobold)
+
+ * dynload.c: New.
+ * cipher.c: Major changes to allow extensions.
+
+Mon Jun 8 22:43:00 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * cipher.c: Major internal chnages to support extensions.
+ * blowfish.c (blowfish_get_info): New and made all internal
+ functions static, changed heder.
+ * cast5.c (cast5_get_info): Likewise.
+
+Mon Jun 8 12:27:52 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * tiger.c (transform): Fix for big endian
+
+ * cipher.c (do_cfb_decrypt): Big endian fix.
+
+Fri May 22 07:30:39 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * md.c (md_get_oid): Add a new one for TIGER.
+
+Thu May 21 13:24:52 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * cipher.c: Add support for a dummy cipher
+
+Thu May 14 15:40:36 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rmd160.c (transform): fixed sigbus - I should better
+ add Christian von Roques's new implemenation of rmd160_write.
+
+Fri May 8 18:07:44 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rand-internal.h, rand-unix.c, rand-w32.c, rand_dummy.c: New
+ * random.c: Moved system specific functions to rand-****.c
+
+Fri May 8 14:01:17 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * random.c (fast_random_poll): add call to gethrtime.
+
+Tue May 5 21:28:55 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * elgamal.c (elg_generate): choosing x was not correct, could
+ yield 6 bytes which are not from the random pool, tsss, tsss..
+
+Tue May 5 14:09:06 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * primegen.c (generate_elg_prime): Add arg mode, changed all
+ callers and implemented mode 1.
+
+Mon Apr 27 14:41:58 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * cipher.c (cipher_get_keylen): New.
+
+Sun Apr 26 14:44:52 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * tiger.c, tiger.h: New.
+
+Wed Apr 8 14:57:11 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * misc.c (check_pubkey_algo2): New.
+
+Tue Apr 7 18:46:49 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * cipher.c: New
+ * misc.c (check_cipher_algo): Moved to cipher.c
+ * cast5.c: Moved many functions to cipher.c
+ * blowfish.c: Likewise.
+
+Sat Apr 4 19:52:08 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * cast5.c: Implemented and tested.
+
+Wed Apr 1 16:38:27 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * elgamal.c (elg_generate): Faster generation of x in some cases.
+
+Thu Mar 19 13:54:48 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * blowfish.c (blowfish_decode_cfb): changed XOR operation
+ (blowfish_encode_cfb): Ditto.
+
+Thu Mar 12 14:04:05 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * sha1.c (transform): Rewrote
+
+ * blowfish.c (encrypt): Unrolled for rounds == 16
+ (decrypt): Ditto.
+
+Tue Mar 10 16:32:08 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rmd160.c (transform): Unrolled the loop.
+
+Tue Mar 10 13:05:14 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * random.c (read_pool): Add pool_balance stuff.
+ (get_random_bits): New.
+
+ * elgamal.c (elg_generate): Now uses get_random_bits to generate x.
+
+
+Tue Mar 10 11:33:51 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * md.c (md_digest_length): New.
+
+Tue Mar 10 11:27:41 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * dsa.c (dsa_verify): Works.
+
+Mon Mar 9 12:59:08 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * dsa.c, dsa.h: Removed some unused code.
+
+Wed Mar 4 10:39:22 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * md.c (md_open): Add call to fast_random_poll.
+ blowfish.c (blowfish_setkey): Ditto.
+
+Tue Mar 3 13:32:54 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * rmd160.c (rmd160_mixblock): New.
+ * random.c: Restructured to start with a new RNG implementation.
+ * random.h: New.
+
+Mon Mar 2 19:21:46 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * gost.c, gost.h: Removed because they did only contain trash.
+
+Sun Mar 1 16:42:29 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * random.c (fill_buffer): removed error message if n == -1.
+
+Fri Feb 27 16:39:34 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * md.c (md_enable): No init if called twice.
+
+Thu Feb 26 07:57:02 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * primegen.c (generate_elg_prime): Changed the progress printing.
+ (gen_prime): Ditto.
+
+Tue Feb 24 12:28:42 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * md5.c, md.5 : Replaced by a modified version of md5.c from
+ GNU textutils 1.22.
+
+Wed Feb 18 14:08:30 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * md.c, md.h : New debugging support
+
+Mon Feb 16 10:08:47 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * misc.c (cipher_algo_to_string): New
+ (pubkey_algo_to_string): New.
+ (digest_algo_to_string): New.
+
+
+ Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+ This file is free software; as a special exception the author gives
+ unlimited permission to copy and/or distribute it, with or without
+ modifications, as long as this notice is preserved.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
+ implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Local Variables:
+buffer-read-only: t
+End:
diff --git a/comm/third_party/libgcrypt/cipher/Makefile.am b/comm/third_party/libgcrypt/cipher/Makefile.am
new file mode 100644
index 0000000000..d644005634
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/Makefile.am
@@ -0,0 +1,258 @@
+# Makefile for cipher modules
+# Copyright (C) 1998, 1999, 2000, 2001, 2002,
+# 2003, 2009 Free Software Foundation, Inc.
+#
+# This file is part of Libgcrypt.
+#
+# Libgcrypt is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# Libgcrypt is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+# Process this file with automake to produce Makefile.in
+
+# Need to include ../src in addition to top_srcdir because gcrypt.h is
+# a built header.
+AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi
+AM_CFLAGS = $(GPG_ERROR_CFLAGS)
+
+AM_CCASFLAGS = $(NOEXECSTACK_FLAGS)
+
+EXTRA_DIST = gost-s-box.c
+
+CLEANFILES = gost-s-box
+DISTCLEANFILES = gost-sb.h
+
+noinst_LTLIBRARIES = libcipher.la
+
+GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \
+ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@
+
+libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES)
+libcipher_la_LIBADD = $(GCRYPT_MODULES)
+
+libcipher_la_SOURCES = \
+ cipher.c cipher-internal.h \
+ cipher-cbc.c \
+ cipher-cfb.c \
+ cipher-ofb.c \
+ cipher-ctr.c \
+ cipher-aeswrap.c \
+ cipher-ccm.c \
+ cipher-cmac.c \
+ cipher-gcm.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \
+ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \
+ cipher-poly1305.c \
+ cipher-ocb.c \
+ cipher-xts.c \
+ cipher-eax.c \
+ cipher-selftest.c cipher-selftest.h \
+ pubkey.c pubkey-internal.h pubkey-util.c \
+ md.c \
+ mac.c mac-internal.h \
+ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \
+ poly1305.c poly1305-internal.h \
+ poly1305-s390x.S \
+ kdf.c kdf-internal.h \
+ bithelp.h \
+ bufhelp.h \
+ primegen.c \
+ hash-common.c hash-common.h \
+ dsa-common.c rsa-common.c \
+ sha1.h
+
+EXTRA_libcipher_la_SOURCES = \
+ asm-common-aarch64.h \
+ asm-common-amd64.h \
+ asm-common-s390x.h \
+ asm-inline-s390x.h \
+ asm-poly1305-aarch64.h \
+ asm-poly1305-amd64.h \
+ asm-poly1305-s390x.h \
+ arcfour.c arcfour-amd64.S \
+ blowfish.c blowfish-amd64.S blowfish-arm.S \
+ cast5.c cast5-amd64.S cast5-arm.S \
+ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \
+ chacha20-armv7-neon.S chacha20-aarch64.S \
+ chacha20-ppc.c chacha20-s390x.S \
+ crc.c crc-intel-pclmul.c crc-armv8-ce.c \
+ crc-armv8-aarch64-ce.S \
+ crc-ppc.c \
+ des.c des-amd64.S \
+ dsa.c \
+ elgamal.c \
+ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \
+ ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \
+ idea.c \
+ gost28147.c gost.h \
+ gostr3411-94.c \
+ md4.c \
+ md5.c \
+ rijndael.c rijndael-internal.h rijndael-tables.h \
+ rijndael-aesni.c rijndael-padlock.c \
+ rijndael-amd64.S rijndael-arm.S \
+ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \
+ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \
+ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \
+ rijndael-ppc.c rijndael-ppc9le.c \
+ rijndael-ppc-common.h rijndael-ppc-functions.h \
+ rijndael-s390x.c \
+ rmd160.c \
+ rsa.c \
+ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
+ scrypt.c \
+ seed.c \
+ serpent.c serpent-sse2-amd64.S \
+ sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \
+ serpent-avx2-amd64.S serpent-armv7-neon.S \
+ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \
+ sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \
+ sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \
+ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \
+ sha256-avx2-bmi2-amd64.S \
+ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \
+ sha256-intel-shaext.c sha256-ppc.c \
+ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \
+ sha512-avx2-bmi2-amd64.S \
+ sha512-armv7-neon.S sha512-arm.S \
+ sha512-ppc.c sha512-ssse3-i386.c \
+ sm3.c \
+ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \
+ stribog.c \
+ tiger.c \
+ whirlpool.c whirlpool-sse2-amd64.S \
+ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \
+ twofish-avx2-amd64.S \
+ rfc2268.c \
+ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \
+ camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \
+ blake2.c \
+ blake2b-amd64-avx2.S blake2s-amd64-avx.S
+
+gost28147.lo: gost-sb.h
+gost-sb.h: gost-s-box
+ ./gost-s-box $@
+
+gost-s-box: gost-s-box.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \
+ $(CPPFLAGS_FOR_BUILD)-o $@ $(srcdir)/gost-s-box.c
+
+
+if ENABLE_O_FLAG_MUNGING
+o_flag_munging = sed -e 's/-O\([2-9sg][2-9sg]*\)/-O1/' -e 's/-Ofast/-O1/g'
+else
+o_flag_munging = cat
+endif
+
+
+# We need to lower the optimization for this module.
+tiger.o: $(srcdir)/tiger.c Makefile
+ `echo $(COMPILE) -c $< | $(o_flag_munging) `
+
+tiger.lo: $(srcdir)/tiger.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(o_flag_munging) `
+
+
+# We need to disable instrumentation for these modules as they use cc as
+# thin assembly front-end and do not tolerate in-between function calls
+# inserted by compiler as those functions may clobber the XMM registers.
+if ENABLE_INSTRUMENTATION_MUNGING
+instrumentation_munging = sed \
+ -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \
+ -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \
+ -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g'
+else
+instrumentation_munging = cat
+endif
+
+rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS
+ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto
+else
+ppc_vcrypto_cflags =
+endif
+
+rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+crc-ppc.o: $(srcdir)/crc-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
diff --git a/comm/third_party/libgcrypt/cipher/Makefile.in b/comm/third_party/libgcrypt/cipher/Makefile.in
new file mode 100644
index 0000000000..ceba51b45a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/Makefile.in
@@ -0,0 +1,1445 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Makefile for cipher modules
+# Copyright (C) 1998, 1999, 2000, 2001, 2002,
+# 2003, 2009 Free Software Foundation, Inc.
+#
+# This file is part of Libgcrypt.
+#
+# Libgcrypt is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# Libgcrypt is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+# Process this file with automake to produce Makefile.in
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = cipher
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cc_for_build.m4 \
+ $(top_srcdir)/m4/gpg-error.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/noexecstack.m4 $(top_srcdir)/m4/socklen.m4 \
+ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+am_libcipher_la_OBJECTS = cipher.lo cipher-cbc.lo cipher-cfb.lo \
+ cipher-ofb.lo cipher-ctr.lo cipher-aeswrap.lo cipher-ccm.lo \
+ cipher-cmac.lo cipher-gcm.lo cipher-gcm-intel-pclmul.lo \
+ cipher-gcm-armv7-neon.lo cipher-gcm-armv8-aarch32-ce.lo \
+ cipher-gcm-armv8-aarch64-ce.lo cipher-poly1305.lo \
+ cipher-ocb.lo cipher-xts.lo cipher-eax.lo cipher-selftest.lo \
+ pubkey.lo pubkey-util.lo md.lo mac.lo mac-hmac.lo mac-cmac.lo \
+ mac-gmac.lo mac-poly1305.lo poly1305.lo poly1305-s390x.lo \
+ kdf.lo primegen.lo hash-common.lo dsa-common.lo rsa-common.lo
+libcipher_la_OBJECTS = $(am_libcipher_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = ./$(DEPDIR)/arcfour-amd64.Plo \
+ ./$(DEPDIR)/arcfour.Plo ./$(DEPDIR)/blake2.Plo \
+ ./$(DEPDIR)/blake2b-amd64-avx2.Plo \
+ ./$(DEPDIR)/blake2s-amd64-avx.Plo \
+ ./$(DEPDIR)/blowfish-amd64.Plo ./$(DEPDIR)/blowfish-arm.Plo \
+ ./$(DEPDIR)/blowfish.Plo ./$(DEPDIR)/camellia-aarch64.Plo \
+ ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo \
+ ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo \
+ ./$(DEPDIR)/camellia-arm.Plo ./$(DEPDIR)/camellia-glue.Plo \
+ ./$(DEPDIR)/camellia.Plo ./$(DEPDIR)/cast5-amd64.Plo \
+ ./$(DEPDIR)/cast5-arm.Plo ./$(DEPDIR)/cast5.Plo \
+ ./$(DEPDIR)/chacha20-aarch64.Plo \
+ ./$(DEPDIR)/chacha20-amd64-avx2.Plo \
+ ./$(DEPDIR)/chacha20-amd64-ssse3.Plo \
+ ./$(DEPDIR)/chacha20-armv7-neon.Plo \
+ ./$(DEPDIR)/chacha20-ppc.Plo ./$(DEPDIR)/chacha20-s390x.Plo \
+ ./$(DEPDIR)/chacha20.Plo ./$(DEPDIR)/cipher-aeswrap.Plo \
+ ./$(DEPDIR)/cipher-cbc.Plo ./$(DEPDIR)/cipher-ccm.Plo \
+ ./$(DEPDIR)/cipher-cfb.Plo ./$(DEPDIR)/cipher-cmac.Plo \
+ ./$(DEPDIR)/cipher-ctr.Plo ./$(DEPDIR)/cipher-eax.Plo \
+ ./$(DEPDIR)/cipher-gcm-armv7-neon.Plo \
+ ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo \
+ ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo \
+ ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo \
+ ./$(DEPDIR)/cipher-gcm.Plo ./$(DEPDIR)/cipher-ocb.Plo \
+ ./$(DEPDIR)/cipher-ofb.Plo ./$(DEPDIR)/cipher-poly1305.Plo \
+ ./$(DEPDIR)/cipher-selftest.Plo ./$(DEPDIR)/cipher-xts.Plo \
+ ./$(DEPDIR)/cipher.Plo ./$(DEPDIR)/crc-armv8-aarch64-ce.Plo \
+ ./$(DEPDIR)/crc-armv8-ce.Plo ./$(DEPDIR)/crc-intel-pclmul.Plo \
+ ./$(DEPDIR)/crc-ppc.Plo ./$(DEPDIR)/crc.Plo \
+ ./$(DEPDIR)/des-amd64.Plo ./$(DEPDIR)/des.Plo \
+ ./$(DEPDIR)/dsa-common.Plo ./$(DEPDIR)/dsa.Plo \
+ ./$(DEPDIR)/ecc-curves.Plo ./$(DEPDIR)/ecc-ecdh.Plo \
+ ./$(DEPDIR)/ecc-ecdsa.Plo ./$(DEPDIR)/ecc-eddsa.Plo \
+ ./$(DEPDIR)/ecc-gost.Plo ./$(DEPDIR)/ecc-misc.Plo \
+ ./$(DEPDIR)/ecc-sm2.Plo ./$(DEPDIR)/ecc.Plo \
+ ./$(DEPDIR)/elgamal.Plo ./$(DEPDIR)/gost28147.Plo \
+ ./$(DEPDIR)/gostr3411-94.Plo ./$(DEPDIR)/hash-common.Plo \
+ ./$(DEPDIR)/idea.Plo ./$(DEPDIR)/kdf.Plo \
+ ./$(DEPDIR)/keccak-armv7-neon.Plo ./$(DEPDIR)/keccak.Plo \
+ ./$(DEPDIR)/mac-cmac.Plo ./$(DEPDIR)/mac-gmac.Plo \
+ ./$(DEPDIR)/mac-hmac.Plo ./$(DEPDIR)/mac-poly1305.Plo \
+ ./$(DEPDIR)/mac.Plo ./$(DEPDIR)/md.Plo ./$(DEPDIR)/md4.Plo \
+ ./$(DEPDIR)/md5.Plo ./$(DEPDIR)/poly1305-s390x.Plo \
+ ./$(DEPDIR)/poly1305.Plo ./$(DEPDIR)/primegen.Plo \
+ ./$(DEPDIR)/pubkey-util.Plo ./$(DEPDIR)/pubkey.Plo \
+ ./$(DEPDIR)/rfc2268.Plo ./$(DEPDIR)/rijndael-aarch64.Plo \
+ ./$(DEPDIR)/rijndael-aesni.Plo ./$(DEPDIR)/rijndael-amd64.Plo \
+ ./$(DEPDIR)/rijndael-arm.Plo \
+ ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo \
+ ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo \
+ ./$(DEPDIR)/rijndael-armv8-ce.Plo \
+ ./$(DEPDIR)/rijndael-padlock.Plo ./$(DEPDIR)/rijndael-ppc.Plo \
+ ./$(DEPDIR)/rijndael-ppc9le.Plo ./$(DEPDIR)/rijndael-s390x.Plo \
+ ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo \
+ ./$(DEPDIR)/rijndael-ssse3-amd64.Plo ./$(DEPDIR)/rijndael.Plo \
+ ./$(DEPDIR)/rmd160.Plo ./$(DEPDIR)/rsa-common.Plo \
+ ./$(DEPDIR)/rsa.Plo ./$(DEPDIR)/salsa20-amd64.Plo \
+ ./$(DEPDIR)/salsa20-armv7-neon.Plo ./$(DEPDIR)/salsa20.Plo \
+ ./$(DEPDIR)/scrypt.Plo ./$(DEPDIR)/seed.Plo \
+ ./$(DEPDIR)/serpent-armv7-neon.Plo \
+ ./$(DEPDIR)/serpent-avx2-amd64.Plo \
+ ./$(DEPDIR)/serpent-sse2-amd64.Plo ./$(DEPDIR)/serpent.Plo \
+ ./$(DEPDIR)/sha1-armv7-neon.Plo \
+ ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo \
+ ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo \
+ ./$(DEPDIR)/sha1-avx-amd64.Plo \
+ ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo \
+ ./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo \
+ ./$(DEPDIR)/sha1-intel-shaext.Plo \
+ ./$(DEPDIR)/sha1-ssse3-amd64.Plo ./$(DEPDIR)/sha1.Plo \
+ ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo \
+ ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo \
+ ./$(DEPDIR)/sha256-avx-amd64.Plo \
+ ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo \
+ ./$(DEPDIR)/sha256-intel-shaext.Plo ./$(DEPDIR)/sha256-ppc.Plo \
+ ./$(DEPDIR)/sha256-ssse3-amd64.Plo ./$(DEPDIR)/sha256.Plo \
+ ./$(DEPDIR)/sha512-arm.Plo ./$(DEPDIR)/sha512-armv7-neon.Plo \
+ ./$(DEPDIR)/sha512-avx-amd64.Plo \
+ ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo \
+ ./$(DEPDIR)/sha512-ppc.Plo ./$(DEPDIR)/sha512-ssse3-amd64.Plo \
+ ./$(DEPDIR)/sha512-ssse3-i386.Plo ./$(DEPDIR)/sha512.Plo \
+ ./$(DEPDIR)/sm3.Plo ./$(DEPDIR)/sm4-aesni-avx-amd64.Plo \
+ ./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo ./$(DEPDIR)/sm4.Plo \
+ ./$(DEPDIR)/stribog.Plo ./$(DEPDIR)/tiger.Plo \
+ ./$(DEPDIR)/twofish-aarch64.Plo ./$(DEPDIR)/twofish-amd64.Plo \
+ ./$(DEPDIR)/twofish-arm.Plo ./$(DEPDIR)/twofish-avx2-amd64.Plo \
+ ./$(DEPDIR)/twofish.Plo ./$(DEPDIR)/whirlpool-sse2-amd64.Plo \
+ ./$(DEPDIR)/whirlpool.Plo
+am__mv = mv -f
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CCASFLAGS) $(CCASFLAGS)
+AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
+am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
+am__v_CPPAS_0 = @echo " CPPAS " $@;
+am__v_CPPAS_1 =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libcipher_la_SOURCES) $(EXTRA_libcipher_la_SOURCES)
+DIST_SOURCES = $(libcipher_la_SOURCES) $(EXTRA_libcipher_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in \
+ $(top_srcdir)/build-aux/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_FILEVERSION = @BUILD_FILEVERSION@
+BUILD_REVISION = @BUILD_REVISION@
+BUILD_TIMESTAMP = @BUILD_TIMESTAMP@
+BUILD_VERSION = @BUILD_VERSION@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DL_LIBS = @DL_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FALLBACK_SOCKLEN_T = @FALLBACK_SOCKLEN_T@
+FGREP = @FGREP@
+GCRYPT_CIPHERS = @GCRYPT_CIPHERS@
+GCRYPT_DIGESTS = @GCRYPT_DIGESTS@
+GCRYPT_HWF_MODULES = @GCRYPT_HWF_MODULES@
+GCRYPT_KDFS = @GCRYPT_KDFS@
+GCRYPT_PUBKEY_CIPHERS = @GCRYPT_PUBKEY_CIPHERS@
+GCRYPT_RANDOM = @GCRYPT_RANDOM@
+GPGRT_CONFIG = @GPGRT_CONFIG@
+GPG_ERROR_CFLAGS = @GPG_ERROR_CFLAGS@
+GPG_ERROR_CONFIG = @GPG_ERROR_CONFIG@
+GPG_ERROR_LIBS = @GPG_ERROR_LIBS@
+GPG_ERROR_MT_CFLAGS = @GPG_ERROR_MT_CFLAGS@
+GPG_ERROR_MT_LIBS = @GPG_ERROR_MT_LIBS@
+GREP = @GREP@
+INSERT_SYS_SELECT_H = @INSERT_SYS_SELECT_H@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDADD_FOR_TESTS_KLUDGE = @LDADD_FOR_TESTS_KLUDGE@
+LDFLAGS = @LDFLAGS@
+LIBGCRYPT_CIPHERS = @LIBGCRYPT_CIPHERS@
+LIBGCRYPT_CONFIG_API_VERSION = @LIBGCRYPT_CONFIG_API_VERSION@
+LIBGCRYPT_CONFIG_CFLAGS = @LIBGCRYPT_CONFIG_CFLAGS@
+LIBGCRYPT_CONFIG_HOST = @LIBGCRYPT_CONFIG_HOST@
+LIBGCRYPT_CONFIG_LIBS = @LIBGCRYPT_CONFIG_LIBS@
+LIBGCRYPT_DIGESTS = @LIBGCRYPT_DIGESTS@
+LIBGCRYPT_LT_AGE = @LIBGCRYPT_LT_AGE@
+LIBGCRYPT_LT_CURRENT = @LIBGCRYPT_LT_CURRENT@
+LIBGCRYPT_LT_REVISION = @LIBGCRYPT_LT_REVISION@
+LIBGCRYPT_PUBKEY_CIPHERS = @LIBGCRYPT_PUBKEY_CIPHERS@
+LIBGCRYPT_THREAD_MODULES = @LIBGCRYPT_THREAD_MODULES@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MPI_SFLAGS = @MPI_SFLAGS@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOEXECSTACK_FLAGS = @NOEXECSTACK_FLAGS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTH_CFLAGS = @PTH_CFLAGS@
+PTH_CONFIG = @PTH_CONFIG@
+PTH_LIBS = @PTH_LIBS@
+RANLIB = @RANLIB@
+RC = @RC@
+RUN_LARGE_DATA_TESTS = @RUN_LARGE_DATA_TESTS@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+SYSROOT = @SYSROOT@
+VERSION = @VERSION@
+VERSION_NUMBER = @VERSION_NUMBER@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+emacs_local_vars_begin = @emacs_local_vars_begin@
+emacs_local_vars_end = @emacs_local_vars_end@
+emacs_local_vars_read_only = @emacs_local_vars_read_only@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Need to include ../src in addition to top_srcdir because gcrypt.h is
+# a built header.
+AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi
+AM_CFLAGS = $(GPG_ERROR_CFLAGS)
+AM_CCASFLAGS = $(NOEXECSTACK_FLAGS)
+EXTRA_DIST = gost-s-box.c
+CLEANFILES = gost-s-box
+DISTCLEANFILES = gost-sb.h
+noinst_LTLIBRARIES = libcipher.la
+GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \
+ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@
+
+libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES)
+libcipher_la_LIBADD = $(GCRYPT_MODULES)
+libcipher_la_SOURCES = \
+ cipher.c cipher-internal.h \
+ cipher-cbc.c \
+ cipher-cfb.c \
+ cipher-ofb.c \
+ cipher-ctr.c \
+ cipher-aeswrap.c \
+ cipher-ccm.c \
+ cipher-cmac.c \
+ cipher-gcm.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \
+ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \
+ cipher-poly1305.c \
+ cipher-ocb.c \
+ cipher-xts.c \
+ cipher-eax.c \
+ cipher-selftest.c cipher-selftest.h \
+ pubkey.c pubkey-internal.h pubkey-util.c \
+ md.c \
+ mac.c mac-internal.h \
+ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \
+ poly1305.c poly1305-internal.h \
+ poly1305-s390x.S \
+ kdf.c kdf-internal.h \
+ bithelp.h \
+ bufhelp.h \
+ primegen.c \
+ hash-common.c hash-common.h \
+ dsa-common.c rsa-common.c \
+ sha1.h
+
+EXTRA_libcipher_la_SOURCES = \
+ asm-common-aarch64.h \
+ asm-common-amd64.h \
+ asm-common-s390x.h \
+ asm-inline-s390x.h \
+ asm-poly1305-aarch64.h \
+ asm-poly1305-amd64.h \
+ asm-poly1305-s390x.h \
+ arcfour.c arcfour-amd64.S \
+ blowfish.c blowfish-amd64.S blowfish-arm.S \
+ cast5.c cast5-amd64.S cast5-arm.S \
+ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \
+ chacha20-armv7-neon.S chacha20-aarch64.S \
+ chacha20-ppc.c chacha20-s390x.S \
+ crc.c crc-intel-pclmul.c crc-armv8-ce.c \
+ crc-armv8-aarch64-ce.S \
+ crc-ppc.c \
+ des.c des-amd64.S \
+ dsa.c \
+ elgamal.c \
+ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \
+ ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \
+ idea.c \
+ gost28147.c gost.h \
+ gostr3411-94.c \
+ md4.c \
+ md5.c \
+ rijndael.c rijndael-internal.h rijndael-tables.h \
+ rijndael-aesni.c rijndael-padlock.c \
+ rijndael-amd64.S rijndael-arm.S \
+ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \
+ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \
+ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \
+ rijndael-ppc.c rijndael-ppc9le.c \
+ rijndael-ppc-common.h rijndael-ppc-functions.h \
+ rijndael-s390x.c \
+ rmd160.c \
+ rsa.c \
+ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
+ scrypt.c \
+ seed.c \
+ serpent.c serpent-sse2-amd64.S \
+ sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \
+ serpent-avx2-amd64.S serpent-armv7-neon.S \
+ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \
+ sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \
+ sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \
+ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \
+ sha256-avx2-bmi2-amd64.S \
+ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \
+ sha256-intel-shaext.c sha256-ppc.c \
+ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \
+ sha512-avx2-bmi2-amd64.S \
+ sha512-armv7-neon.S sha512-arm.S \
+ sha512-ppc.c sha512-ssse3-i386.c \
+ sm3.c \
+ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \
+ stribog.c \
+ tiger.c \
+ whirlpool.c whirlpool-sse2-amd64.S \
+ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \
+ twofish-avx2-amd64.S \
+ rfc2268.c \
+ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \
+ camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \
+ blake2.c \
+ blake2b-amd64-avx2.S blake2s-amd64-avx.S
+
+@ENABLE_O_FLAG_MUNGING_FALSE@o_flag_munging = cat
+@ENABLE_O_FLAG_MUNGING_TRUE@o_flag_munging = sed -e 's/-O\([2-9sg][2-9sg]*\)/-O1/' -e 's/-Ofast/-O1/g'
+@ENABLE_INSTRUMENTATION_MUNGING_FALSE@instrumentation_munging = cat
+
+# We need to disable instrumentation for these modules as they use cc as
+# thin assembly front-end and do not tolerate in-between function calls
+# inserted by compiler as those functions may clobber the XMM registers.
+@ENABLE_INSTRUMENTATION_MUNGING_TRUE@instrumentation_munging = sed \
+@ENABLE_INSTRUMENTATION_MUNGING_TRUE@ -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \
+@ENABLE_INSTRUMENTATION_MUNGING_TRUE@ -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \
+@ENABLE_INSTRUMENTATION_MUNGING_TRUE@ -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g'
+
+@ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_FALSE@ppc_vcrypto_cflags =
+@ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_TRUE@ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .S .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cipher/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu cipher/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libcipher.la: $(libcipher_la_OBJECTS) $(libcipher_la_DEPENDENCIES) $(EXTRA_libcipher_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(LINK) $(libcipher_la_OBJECTS) $(libcipher_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arcfour-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arcfour.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blake2.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blake2b-amd64-avx2.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blake2s-amd64-avx.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish-arm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aarch64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aesni-avx-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-arm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-glue.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5-arm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-aarch64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-amd64-avx2.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-amd64-ssse3.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-armv7-neon.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-ppc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-s390x.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-aeswrap.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cbc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ccm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cfb.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cmac.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ctr.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-eax.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv7-neon.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ocb.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ofb.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-poly1305.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-selftest.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-xts.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-armv8-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-intel-pclmul.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-ppc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/des-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/des.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dsa-common.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dsa.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-curves.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-ecdh.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-ecdsa.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-eddsa.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-gost.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-misc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-sm2.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elgamal.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gost28147.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gostr3411-94.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash-common.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/idea.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kdf.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keccak-armv7-neon.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keccak.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-cmac.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-gmac.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-hmac.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-poly1305.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md4.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305-s390x.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/primegen.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pubkey-util.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pubkey.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rfc2268.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-aarch64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-aesni.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-arm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-padlock.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ppc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ppc9le.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-s390x.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ssse3-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rmd160.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rsa-common.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rsa.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20-armv7-neon.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scrypt.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seed.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-armv7-neon.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-avx2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-sse2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv7-neon.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-intel-shaext.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-ssse3-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-avx-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-intel-shaext.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-ppc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-ssse3-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-arm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-armv7-neon.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-avx-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-ppc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-ssse3-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-ssse3-i386.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm3.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm4-aesni-avx-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm4.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stribog.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tiger.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-aarch64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-arm.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-avx2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/whirlpool-sse2-amd64.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/whirlpool.Plo@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+ @$(MKDIR_P) $(@D)
+ @echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.S.o:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+ -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -f ./$(DEPDIR)/arcfour-amd64.Plo
+ -rm -f ./$(DEPDIR)/arcfour.Plo
+ -rm -f ./$(DEPDIR)/blake2.Plo
+ -rm -f ./$(DEPDIR)/blake2b-amd64-avx2.Plo
+ -rm -f ./$(DEPDIR)/blake2s-amd64-avx.Plo
+ -rm -f ./$(DEPDIR)/blowfish-amd64.Plo
+ -rm -f ./$(DEPDIR)/blowfish-arm.Plo
+ -rm -f ./$(DEPDIR)/blowfish.Plo
+ -rm -f ./$(DEPDIR)/camellia-aarch64.Plo
+ -rm -f ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/camellia-arm.Plo
+ -rm -f ./$(DEPDIR)/camellia-glue.Plo
+ -rm -f ./$(DEPDIR)/camellia.Plo
+ -rm -f ./$(DEPDIR)/cast5-amd64.Plo
+ -rm -f ./$(DEPDIR)/cast5-arm.Plo
+ -rm -f ./$(DEPDIR)/cast5.Plo
+ -rm -f ./$(DEPDIR)/chacha20-aarch64.Plo
+ -rm -f ./$(DEPDIR)/chacha20-amd64-avx2.Plo
+ -rm -f ./$(DEPDIR)/chacha20-amd64-ssse3.Plo
+ -rm -f ./$(DEPDIR)/chacha20-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/chacha20-ppc.Plo
+ -rm -f ./$(DEPDIR)/chacha20-s390x.Plo
+ -rm -f ./$(DEPDIR)/chacha20.Plo
+ -rm -f ./$(DEPDIR)/cipher-aeswrap.Plo
+ -rm -f ./$(DEPDIR)/cipher-cbc.Plo
+ -rm -f ./$(DEPDIR)/cipher-ccm.Plo
+ -rm -f ./$(DEPDIR)/cipher-cfb.Plo
+ -rm -f ./$(DEPDIR)/cipher-cmac.Plo
+ -rm -f ./$(DEPDIR)/cipher-ctr.Plo
+ -rm -f ./$(DEPDIR)/cipher-eax.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm.Plo
+ -rm -f ./$(DEPDIR)/cipher-ocb.Plo
+ -rm -f ./$(DEPDIR)/cipher-ofb.Plo
+ -rm -f ./$(DEPDIR)/cipher-poly1305.Plo
+ -rm -f ./$(DEPDIR)/cipher-selftest.Plo
+ -rm -f ./$(DEPDIR)/cipher-xts.Plo
+ -rm -f ./$(DEPDIR)/cipher.Plo
+ -rm -f ./$(DEPDIR)/crc-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/crc-armv8-ce.Plo
+ -rm -f ./$(DEPDIR)/crc-intel-pclmul.Plo
+ -rm -f ./$(DEPDIR)/crc-ppc.Plo
+ -rm -f ./$(DEPDIR)/crc.Plo
+ -rm -f ./$(DEPDIR)/des-amd64.Plo
+ -rm -f ./$(DEPDIR)/des.Plo
+ -rm -f ./$(DEPDIR)/dsa-common.Plo
+ -rm -f ./$(DEPDIR)/dsa.Plo
+ -rm -f ./$(DEPDIR)/ecc-curves.Plo
+ -rm -f ./$(DEPDIR)/ecc-ecdh.Plo
+ -rm -f ./$(DEPDIR)/ecc-ecdsa.Plo
+ -rm -f ./$(DEPDIR)/ecc-eddsa.Plo
+ -rm -f ./$(DEPDIR)/ecc-gost.Plo
+ -rm -f ./$(DEPDIR)/ecc-misc.Plo
+ -rm -f ./$(DEPDIR)/ecc-sm2.Plo
+ -rm -f ./$(DEPDIR)/ecc.Plo
+ -rm -f ./$(DEPDIR)/elgamal.Plo
+ -rm -f ./$(DEPDIR)/gost28147.Plo
+ -rm -f ./$(DEPDIR)/gostr3411-94.Plo
+ -rm -f ./$(DEPDIR)/hash-common.Plo
+ -rm -f ./$(DEPDIR)/idea.Plo
+ -rm -f ./$(DEPDIR)/kdf.Plo
+ -rm -f ./$(DEPDIR)/keccak-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/keccak.Plo
+ -rm -f ./$(DEPDIR)/mac-cmac.Plo
+ -rm -f ./$(DEPDIR)/mac-gmac.Plo
+ -rm -f ./$(DEPDIR)/mac-hmac.Plo
+ -rm -f ./$(DEPDIR)/mac-poly1305.Plo
+ -rm -f ./$(DEPDIR)/mac.Plo
+ -rm -f ./$(DEPDIR)/md.Plo
+ -rm -f ./$(DEPDIR)/md4.Plo
+ -rm -f ./$(DEPDIR)/md5.Plo
+ -rm -f ./$(DEPDIR)/poly1305-s390x.Plo
+ -rm -f ./$(DEPDIR)/poly1305.Plo
+ -rm -f ./$(DEPDIR)/primegen.Plo
+ -rm -f ./$(DEPDIR)/pubkey-util.Plo
+ -rm -f ./$(DEPDIR)/pubkey.Plo
+ -rm -f ./$(DEPDIR)/rfc2268.Plo
+ -rm -f ./$(DEPDIR)/rijndael-aarch64.Plo
+ -rm -f ./$(DEPDIR)/rijndael-aesni.Plo
+ -rm -f ./$(DEPDIR)/rijndael-amd64.Plo
+ -rm -f ./$(DEPDIR)/rijndael-arm.Plo
+ -rm -f ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/rijndael-armv8-ce.Plo
+ -rm -f ./$(DEPDIR)/rijndael-padlock.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ppc.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ppc9le.Plo
+ -rm -f ./$(DEPDIR)/rijndael-s390x.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/rijndael.Plo
+ -rm -f ./$(DEPDIR)/rmd160.Plo
+ -rm -f ./$(DEPDIR)/rsa-common.Plo
+ -rm -f ./$(DEPDIR)/rsa.Plo
+ -rm -f ./$(DEPDIR)/salsa20-amd64.Plo
+ -rm -f ./$(DEPDIR)/salsa20-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/salsa20.Plo
+ -rm -f ./$(DEPDIR)/scrypt.Plo
+ -rm -f ./$(DEPDIR)/seed.Plo
+ -rm -f ./$(DEPDIR)/serpent-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/serpent-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/serpent-sse2-amd64.Plo
+ -rm -f ./$(DEPDIR)/serpent.Plo
+ -rm -f ./$(DEPDIR)/sha1-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/sha1-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1-intel-shaext.Plo
+ -rm -f ./$(DEPDIR)/sha1-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1.Plo
+ -rm -f ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/sha256-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha256-intel-shaext.Plo
+ -rm -f ./$(DEPDIR)/sha256-ppc.Plo
+ -rm -f ./$(DEPDIR)/sha256-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha256.Plo
+ -rm -f ./$(DEPDIR)/sha512-arm.Plo
+ -rm -f ./$(DEPDIR)/sha512-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/sha512-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha512-ppc.Plo
+ -rm -f ./$(DEPDIR)/sha512-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha512-ssse3-i386.Plo
+ -rm -f ./$(DEPDIR)/sha512.Plo
+ -rm -f ./$(DEPDIR)/sm3.Plo
+ -rm -f ./$(DEPDIR)/sm4-aesni-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sm4.Plo
+ -rm -f ./$(DEPDIR)/stribog.Plo
+ -rm -f ./$(DEPDIR)/tiger.Plo
+ -rm -f ./$(DEPDIR)/twofish-aarch64.Plo
+ -rm -f ./$(DEPDIR)/twofish-amd64.Plo
+ -rm -f ./$(DEPDIR)/twofish-arm.Plo
+ -rm -f ./$(DEPDIR)/twofish-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/twofish.Plo
+ -rm -f ./$(DEPDIR)/whirlpool-sse2-amd64.Plo
+ -rm -f ./$(DEPDIR)/whirlpool.Plo
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f ./$(DEPDIR)/arcfour-amd64.Plo
+ -rm -f ./$(DEPDIR)/arcfour.Plo
+ -rm -f ./$(DEPDIR)/blake2.Plo
+ -rm -f ./$(DEPDIR)/blake2b-amd64-avx2.Plo
+ -rm -f ./$(DEPDIR)/blake2s-amd64-avx.Plo
+ -rm -f ./$(DEPDIR)/blowfish-amd64.Plo
+ -rm -f ./$(DEPDIR)/blowfish-arm.Plo
+ -rm -f ./$(DEPDIR)/blowfish.Plo
+ -rm -f ./$(DEPDIR)/camellia-aarch64.Plo
+ -rm -f ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/camellia-arm.Plo
+ -rm -f ./$(DEPDIR)/camellia-glue.Plo
+ -rm -f ./$(DEPDIR)/camellia.Plo
+ -rm -f ./$(DEPDIR)/cast5-amd64.Plo
+ -rm -f ./$(DEPDIR)/cast5-arm.Plo
+ -rm -f ./$(DEPDIR)/cast5.Plo
+ -rm -f ./$(DEPDIR)/chacha20-aarch64.Plo
+ -rm -f ./$(DEPDIR)/chacha20-amd64-avx2.Plo
+ -rm -f ./$(DEPDIR)/chacha20-amd64-ssse3.Plo
+ -rm -f ./$(DEPDIR)/chacha20-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/chacha20-ppc.Plo
+ -rm -f ./$(DEPDIR)/chacha20-s390x.Plo
+ -rm -f ./$(DEPDIR)/chacha20.Plo
+ -rm -f ./$(DEPDIR)/cipher-aeswrap.Plo
+ -rm -f ./$(DEPDIR)/cipher-cbc.Plo
+ -rm -f ./$(DEPDIR)/cipher-ccm.Plo
+ -rm -f ./$(DEPDIR)/cipher-cfb.Plo
+ -rm -f ./$(DEPDIR)/cipher-cmac.Plo
+ -rm -f ./$(DEPDIR)/cipher-ctr.Plo
+ -rm -f ./$(DEPDIR)/cipher-eax.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo
+ -rm -f ./$(DEPDIR)/cipher-gcm.Plo
+ -rm -f ./$(DEPDIR)/cipher-ocb.Plo
+ -rm -f ./$(DEPDIR)/cipher-ofb.Plo
+ -rm -f ./$(DEPDIR)/cipher-poly1305.Plo
+ -rm -f ./$(DEPDIR)/cipher-selftest.Plo
+ -rm -f ./$(DEPDIR)/cipher-xts.Plo
+ -rm -f ./$(DEPDIR)/cipher.Plo
+ -rm -f ./$(DEPDIR)/crc-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/crc-armv8-ce.Plo
+ -rm -f ./$(DEPDIR)/crc-intel-pclmul.Plo
+ -rm -f ./$(DEPDIR)/crc-ppc.Plo
+ -rm -f ./$(DEPDIR)/crc.Plo
+ -rm -f ./$(DEPDIR)/des-amd64.Plo
+ -rm -f ./$(DEPDIR)/des.Plo
+ -rm -f ./$(DEPDIR)/dsa-common.Plo
+ -rm -f ./$(DEPDIR)/dsa.Plo
+ -rm -f ./$(DEPDIR)/ecc-curves.Plo
+ -rm -f ./$(DEPDIR)/ecc-ecdh.Plo
+ -rm -f ./$(DEPDIR)/ecc-ecdsa.Plo
+ -rm -f ./$(DEPDIR)/ecc-eddsa.Plo
+ -rm -f ./$(DEPDIR)/ecc-gost.Plo
+ -rm -f ./$(DEPDIR)/ecc-misc.Plo
+ -rm -f ./$(DEPDIR)/ecc-sm2.Plo
+ -rm -f ./$(DEPDIR)/ecc.Plo
+ -rm -f ./$(DEPDIR)/elgamal.Plo
+ -rm -f ./$(DEPDIR)/gost28147.Plo
+ -rm -f ./$(DEPDIR)/gostr3411-94.Plo
+ -rm -f ./$(DEPDIR)/hash-common.Plo
+ -rm -f ./$(DEPDIR)/idea.Plo
+ -rm -f ./$(DEPDIR)/kdf.Plo
+ -rm -f ./$(DEPDIR)/keccak-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/keccak.Plo
+ -rm -f ./$(DEPDIR)/mac-cmac.Plo
+ -rm -f ./$(DEPDIR)/mac-gmac.Plo
+ -rm -f ./$(DEPDIR)/mac-hmac.Plo
+ -rm -f ./$(DEPDIR)/mac-poly1305.Plo
+ -rm -f ./$(DEPDIR)/mac.Plo
+ -rm -f ./$(DEPDIR)/md.Plo
+ -rm -f ./$(DEPDIR)/md4.Plo
+ -rm -f ./$(DEPDIR)/md5.Plo
+ -rm -f ./$(DEPDIR)/poly1305-s390x.Plo
+ -rm -f ./$(DEPDIR)/poly1305.Plo
+ -rm -f ./$(DEPDIR)/primegen.Plo
+ -rm -f ./$(DEPDIR)/pubkey-util.Plo
+ -rm -f ./$(DEPDIR)/pubkey.Plo
+ -rm -f ./$(DEPDIR)/rfc2268.Plo
+ -rm -f ./$(DEPDIR)/rijndael-aarch64.Plo
+ -rm -f ./$(DEPDIR)/rijndael-aesni.Plo
+ -rm -f ./$(DEPDIR)/rijndael-amd64.Plo
+ -rm -f ./$(DEPDIR)/rijndael-arm.Plo
+ -rm -f ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/rijndael-armv8-ce.Plo
+ -rm -f ./$(DEPDIR)/rijndael-padlock.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ppc.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ppc9le.Plo
+ -rm -f ./$(DEPDIR)/rijndael-s390x.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo
+ -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/rijndael.Plo
+ -rm -f ./$(DEPDIR)/rmd160.Plo
+ -rm -f ./$(DEPDIR)/rsa-common.Plo
+ -rm -f ./$(DEPDIR)/rsa.Plo
+ -rm -f ./$(DEPDIR)/salsa20-amd64.Plo
+ -rm -f ./$(DEPDIR)/salsa20-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/salsa20.Plo
+ -rm -f ./$(DEPDIR)/scrypt.Plo
+ -rm -f ./$(DEPDIR)/seed.Plo
+ -rm -f ./$(DEPDIR)/serpent-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/serpent-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/serpent-sse2-amd64.Plo
+ -rm -f ./$(DEPDIR)/serpent.Plo
+ -rm -f ./$(DEPDIR)/sha1-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/sha1-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1-intel-shaext.Plo
+ -rm -f ./$(DEPDIR)/sha1-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha1.Plo
+ -rm -f ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo
+ -rm -f ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo
+ -rm -f ./$(DEPDIR)/sha256-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha256-intel-shaext.Plo
+ -rm -f ./$(DEPDIR)/sha256-ppc.Plo
+ -rm -f ./$(DEPDIR)/sha256-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha256.Plo
+ -rm -f ./$(DEPDIR)/sha512-arm.Plo
+ -rm -f ./$(DEPDIR)/sha512-armv7-neon.Plo
+ -rm -f ./$(DEPDIR)/sha512-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha512-ppc.Plo
+ -rm -f ./$(DEPDIR)/sha512-ssse3-amd64.Plo
+ -rm -f ./$(DEPDIR)/sha512-ssse3-i386.Plo
+ -rm -f ./$(DEPDIR)/sha512.Plo
+ -rm -f ./$(DEPDIR)/sm3.Plo
+ -rm -f ./$(DEPDIR)/sm4-aesni-avx-amd64.Plo
+ -rm -f ./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/sm4.Plo
+ -rm -f ./$(DEPDIR)/stribog.Plo
+ -rm -f ./$(DEPDIR)/tiger.Plo
+ -rm -f ./$(DEPDIR)/twofish-aarch64.Plo
+ -rm -f ./$(DEPDIR)/twofish-amd64.Plo
+ -rm -f ./$(DEPDIR)/twofish-arm.Plo
+ -rm -f ./$(DEPDIR)/twofish-avx2-amd64.Plo
+ -rm -f ./$(DEPDIR)/twofish.Plo
+ -rm -f ./$(DEPDIR)/whirlpool-sse2-amd64.Plo
+ -rm -f ./$(DEPDIR)/whirlpool.Plo
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+ clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ cscopelist-am ctags ctags-am distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-man install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+gost28147.lo: gost-sb.h
+gost-sb.h: gost-s-box
+ ./gost-s-box $@
+
+gost-s-box: gost-s-box.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \
+ $(CPPFLAGS_FOR_BUILD)-o $@ $(srcdir)/gost-s-box.c
+
+# We need to lower the optimization for this module.
+tiger.o: $(srcdir)/tiger.c Makefile
+ `echo $(COMPILE) -c $< | $(o_flag_munging) `
+
+tiger.lo: $(srcdir)/tiger.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(o_flag_munging) `
+
+rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile
+ `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile
+ `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+crc-ppc.o: $(srcdir)/crc-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/comm/third_party/libgcrypt/cipher/arcfour-amd64.S b/comm/third_party/libgcrypt/cipher/arcfour-amd64.S
new file mode 100644
index 0000000000..221dfeff77
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/arcfour-amd64.S
@@ -0,0 +1,108 @@
+/*
+** RC4 implementation optimized for AMD64.
+**
+** Author: Marc Bevand <bevand_m (at) epita.fr>
+** Licence: I hereby disclaim the copyright on this code and place it
+** in the public domain.
+**
+** The throughput achieved by this code is about 320 MBytes/sec, on
+** a 1.8 GHz AMD Opteron (rev C0) processor.
+**
+** 2013/12/20 <jussi.kivilinna@iki.fi>:
+** - Integrated to libgcrypt
+** - 4.18 cycles/byte on Intel i5-4570
+*/
+
+#ifdef __x86_64__
+#include <config.h>
+#if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+.align 16
+.globl _gcry_arcfour_amd64
+ELF(.type _gcry_arcfour_amd64,@function)
+_gcry_arcfour_amd64:
+ CFI_STARTPROC()
+ ENTER_SYSV_FUNC_PARAMS_0_4
+ push %rbp
+ CFI_PUSH(%rbp)
+ push %rbx
+ CFI_PUSH(%rbx)
+ mov %rdi, %rbp # key = ARG(key)
+ mov %rsi, %rbx # rbx = ARG(len)
+ mov %rdx, %rsi # in = ARG(in)
+ mov %rcx, %rdi # out = ARG(out)
+ mov (4*256)(%rbp), %ecx # x = key->x
+ mov (4*256+4)(%rbp),%edx # y = key->y
+ inc %rcx # x++
+ and $255, %rcx # x &= 0xff
+ lea -8(%rbx,%rsi), %rbx # rbx = in+len-8
+ mov %rbx, %r9 # tmp = in+len-8
+ mov (%rbp,%rcx,4), %eax # tx = d[x]
+ cmp %rsi, %rbx # cmp in with in+len-8
+ jl .Lend # jump if (in+len-8 < in)
+
+.Lstart:
+ add $8, %rsi # increment in
+ add $8, %rdi # increment out
+
+ # generate the next 8 bytes of the rc4 stream into %r8
+ mov $8, %r11 # byte counter
+1: add %al, %dl # y += tx
+ mov (%rbp,%rdx,4), %ebx # ty = d[y]
+ mov %ebx, (%rbp,%rcx,4) # d[x] = ty
+ add %al, %bl # val = ty + tx
+ mov %eax, (%rbp,%rdx,4) # d[y] = tx
+ inc %cl # x++ (NEXT ROUND)
+ mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND)
+ shl $8, %r8
+ movb (%rbp,%rbx,4), %r8b # val = d[val]
+ dec %r11b
+ jnz 1b
+
+ # xor 8 bytes
+ bswap %r8
+ xor -8(%rsi), %r8
+ cmp %r9, %rsi # cmp in+len-8 with in
+ mov %r8, -8(%rdi)
+ jle .Lstart # jump if (in <= in+len-8)
+
+.Lend:
+ add $8, %r9 # tmp = in+len
+
+ # handle the last bytes, one by one
+1: cmp %rsi, %r9 # cmp in with in+len
+ jle .Lfinished # jump if (in+len <= in)
+ add %al, %dl # y += tx
+ mov (%rbp,%rdx,4), %ebx # ty = d[y]
+ mov %ebx, (%rbp,%rcx,4) # d[x] = ty
+ add %al, %bl # val = ty + tx
+ mov %eax, (%rbp,%rdx,4) # d[y] = tx
+ inc %cl # x++ (NEXT ROUND)
+ mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND)
+ movb (%rbp,%rbx,4), %r8b # val = d[val]
+ xor (%rsi), %r8b # xor 1 byte
+ movb %r8b, (%rdi)
+ inc %rsi # in++
+ inc %rdi # out++
+ jmp 1b
+
+.Lfinished:
+ dec %rcx # x--
+ movb %cl, (4*256)(%rbp) # key->y = y
+ movb %dl, (4*256+4)(%rbp) # key->x = x
+ pop %rbx
+ CFI_POP(%rbx)
+ pop %rbp
+ CFI_POP(%rbp)
+ EXIT_SYSV_FUNC
+ ret
+ CFI_ENDPROC()
+.L__gcry_arcfour_amd64_end:
+ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/arcfour.c b/comm/third_party/libgcrypt/cipher/arcfour.c
new file mode 100644
index 0000000000..353de00bd7
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/arcfour.c
@@ -0,0 +1,216 @@
+/* arcfour.c - The arcfour stream cipher
+ * Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * For a description of the algorithm, see:
+ * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996.
+ * ISBN 0-471-11709-9. Pages 397 ff.
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "cipher-internal.h"
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+static const char *selftest(void);
+
+#ifdef USE_AMD64_ASM
+
+typedef struct {
+ u32 sbox[256];
+ u32 idx_i, idx_j;
+} ARCFOUR_context;
+
+void _gcry_arcfour_amd64(void *key, size_t len, const byte *indata,
+ byte *outdata);
+
+static void
+encrypt_stream (void *context,
+ byte *outbuf, const byte *inbuf, size_t length)
+{
+ _gcry_arcfour_amd64 (context, length, inbuf, outbuf );
+}
+
+#else /*!USE_AMD64_ASM*/
+
+typedef struct {
+ byte sbox[256];
+ int idx_i, idx_j;
+} ARCFOUR_context;
+
+static void
+do_encrypt_stream( ARCFOUR_context *ctx,
+ byte *outbuf, const byte *inbuf, size_t length )
+{
+#ifndef __i386__
+ register unsigned int i = ctx->idx_i;
+ register byte j = ctx->idx_j;
+ register byte *sbox = ctx->sbox;
+ register byte t, u;
+
+ while ( length-- )
+ {
+ i++;
+ t = sbox[(byte)i];
+ j += t;
+ u = sbox[j];
+ sbox[(byte)i] = u;
+ u += t;
+ sbox[j] = t;
+ *outbuf++ = sbox[u] ^ *inbuf++;
+ }
+
+ ctx->idx_i = (byte)i;
+ ctx->idx_j = (byte)j;
+#else /*__i386__*/
+ /* Old implementation of arcfour is faster on i386 than the version above.
+ * This is because version above increases register pressure which on i386
+ * would push some of the variables to memory/stack. Therefore keep this
+ * version for i386 to avoid regressing performance. */
+ register int i = ctx->idx_i;
+ register int j = ctx->idx_j;
+ register byte *sbox = ctx->sbox;
+ register int t;
+
+ while ( length-- )
+ {
+ i++;
+ i = i & 255; /* The and-op seems to be faster than the mod-op. */
+ j += sbox[i];
+ j &= 255;
+ t = sbox[i]; sbox[i] = sbox[j]; sbox[j] = t;
+ *outbuf++ = *inbuf++ ^ sbox[(sbox[i] + sbox[j]) & 255];
+ }
+
+ ctx->idx_i = i;
+ ctx->idx_j = j;
+#endif
+}
+
+static void
+encrypt_stream (void *context,
+ byte *outbuf, const byte *inbuf, size_t length)
+{
+ ARCFOUR_context *ctx = (ARCFOUR_context *) context;
+ do_encrypt_stream (ctx, outbuf, inbuf, length );
+ _gcry_burn_stack (64);
+}
+
+#endif /*!USE_AMD64_ASM*/
+
+
+static gcry_err_code_t
+do_arcfour_setkey (void *context, const byte *key, unsigned int keylen)
+{
+ static int initialized;
+ static const char* selftest_failed;
+ int i, j;
+ byte karr[256];
+ ARCFOUR_context *ctx = (ARCFOUR_context *) context;
+
+ if (!initialized )
+ {
+ initialized = 1;
+ selftest_failed = selftest();
+ if( selftest_failed )
+ log_error ("ARCFOUR selftest failed (%s)\n", selftest_failed );
+ }
+ if( selftest_failed )
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if( keylen < 40/8 ) /* we want at least 40 bits */
+ return GPG_ERR_INV_KEYLEN;
+
+ ctx->idx_i = ctx->idx_j = 0;
+ for (i=0; i < 256; i++ )
+ ctx->sbox[i] = i;
+ for (i=j=0; i < 256; i++,j++ )
+ {
+ if (j >= keylen)
+ j = 0;
+ karr[i] = key[j];
+ }
+ for (i=j=0; i < 256; i++ )
+ {
+ int t;
+ j = (j + ctx->sbox[i] + karr[i]) & 255;
+ t = ctx->sbox[i];
+ ctx->sbox[i] = ctx->sbox[j];
+ ctx->sbox[j] = t;
+ }
+ wipememory( karr, sizeof(karr) );
+
+ return GPG_ERR_NO_ERROR;
+}
+
+static gcry_err_code_t
+arcfour_setkey ( void *context, const byte *key, unsigned int keylen,
+ cipher_bulk_ops_t *bulk_ops )
+{
+ ARCFOUR_context *ctx = (ARCFOUR_context *) context;
+ gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen );
+ (void)bulk_ops;
+ return rc;
+}
+
+
+static const char*
+selftest(void)
+{
+ ARCFOUR_context ctx;
+ byte scratch[16];
+
+ /* Test vector from Cryptlib labeled there: "from the
+ State/Commerce Department". */
+ static const byte key_1[] =
+ { 0x61, 0x8A, 0x63, 0xD2, 0xFB };
+ static const byte plaintext_1[] =
+ { 0xDC, 0xEE, 0x4C, 0xF9, 0x2C };
+ static const byte ciphertext_1[] =
+ { 0xF1, 0x38, 0x29, 0xC9, 0xDE };
+
+ arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
+ encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1));
+ if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1)))
+ return "Arcfour encryption test 1 failed.";
+ arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
+ encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */
+ if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1)))
+ return "Arcfour decryption test 1 failed.";
+ return NULL;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_arcfour =
+ {
+ GCRY_CIPHER_ARCFOUR, {0, 0},
+ "ARCFOUR", NULL, NULL, 1, 128, sizeof (ARCFOUR_context),
+ arcfour_setkey, NULL, NULL, encrypt_stream, encrypt_stream,
+ };
diff --git a/comm/third_party/libgcrypt/cipher/asm-common-aarch64.h b/comm/third_party/libgcrypt/cipher/asm-common-aarch64.h
new file mode 100644
index 0000000000..cf0afe1f87
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/asm-common-aarch64.h
@@ -0,0 +1,104 @@
+/* asm-common-aarch64.h - Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsjö <martin@martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AARCH64_H
+#define GCRY_ASM_COMMON_AARCH64_H
+
+#include <config.h>
+
+#ifdef HAVE_GCC_ASM_ELF_DIRECTIVES
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#ifdef __APPLE__
+#define GET_DATA_POINTER(reg, name) \
+ adrp reg, name@GOTPAGE ; \
+ add reg, reg, name@GOTPAGEOFF ;
+#elif defined(_WIN32)
+#define GET_DATA_POINTER(reg, name) \
+ adrp reg, name ; \
+ add reg, reg, #:lo12:name ;
+#else
+#define GET_DATA_POINTER(reg, name) \
+ adrp reg, :got:name ; \
+ ldr reg, [reg, #:got_lo12:name] ;
+#endif
+
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+/* CFI directives to emit DWARF stack unwinding information. */
+# define CFI_STARTPROC() .cfi_startproc
+# define CFI_ENDPROC() .cfi_endproc
+# define CFI_REMEMBER_STATE() .cfi_remember_state
+# define CFI_RESTORE_STATE() .cfi_restore_state
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
+# define CFI_REGISTER(ro,rn) .cfi_register ro, rn
+# define CFI_RESTORE(reg) .cfi_restore reg
+
+/* CFA expressions are used for pointing CFA and registers to
+ * SP relative offsets. */
+# define DW_REGNO_SP 31
+
+/* Fixed length encoding used for integers for now. */
+# define DW_SLEB128_7BIT(value) \
+ 0x00|((value) & 0x7f)
+# define DW_SLEB128_28BIT(value) \
+ 0x80|((value)&0x7f), \
+ 0x80|(((value)>>7)&0x7f), \
+ 0x80|(((value)>>14)&0x7f), \
+ 0x00|(((value)>>21)&0x7f)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \
+ .cfi_escape \
+ 0x0f, /* DW_CFA_def_cfa_expression */ \
+ DW_SLEB128_7BIT(11), /* length */ \
+ 0x8f, /* DW_OP_breg31, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs), \
+ 0x06, /* DW_OP_deref */ \
+ 0x23, /* DW_OP_plus_constu */ \
+ DW_SLEB128_28BIT((cfa_depth)+8)
+
+# define CFI_REG_ON_STACK(regno,rsp_offs) \
+ .cfi_escape \
+ 0x10, /* DW_CFA_expression */ \
+ DW_SLEB128_7BIT(regno), \
+ DW_SLEB128_7BIT(5), /* length */ \
+ 0x8f, /* DW_OP_breg31, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs)
+
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_REMEMBER_STATE()
+# define CFI_RESTORE_STATE()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_REGISTER(ro,rn)
+# define CFI_RESTORE(reg)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth)
+# define CFI_REG_ON_STACK(reg,rsp_offs)
+#endif
+
+#endif /* GCRY_ASM_COMMON_AARCH64_H */
diff --git a/comm/third_party/libgcrypt/cipher/asm-common-amd64.h b/comm/third_party/libgcrypt/cipher/asm-common-amd64.h
new file mode 100644
index 0000000000..9d4a028a04
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/asm-common-amd64.h
@@ -0,0 +1,189 @@
+/* asm-common-amd64.h - Common macros for AMD64 assembly
+ *
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AMD64_H
+#define GCRY_ASM_COMMON_AMD64_H
+
+#include <config.h>
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#ifdef __PIC__
+# define rRIP (%rip)
+#else
+# define rRIP
+#endif
+
+#ifdef __PIC__
+# define RIP %rip
+#else
+# define RIP
+#endif
+
+#ifdef __PIC__
+# define ADD_RIP +rip
+#else
+# define ADD_RIP
+#endif
+
+#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
+# define GET_EXTERN_POINTER(name, reg) movabsq $name, reg
+#else
+# ifdef __code_model_large__
+# define GET_EXTERN_POINTER(name, reg) \
+ pushq %r15; \
+ pushq %r14; \
+ 1: leaq 1b(%rip), reg; \
+ movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \
+ movabsq $name@GOT, %r15; \
+ addq %r14, reg; \
+ popq %r14; \
+ movq (reg, %r15), reg; \
+ popq %r15;
+# else
+# define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg
+# endif
+#endif
+
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+/* CFI directives to emit DWARF stack unwinding information. */
+# define CFI_STARTPROC() .cfi_startproc
+# define CFI_ENDPROC() .cfi_endproc
+# define CFI_REMEMBER_STATE() .cfi_remember_state
+# define CFI_RESTORE_STATE() .cfi_restore_state
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
+# define CFI_REGISTER(ro,rn) .cfi_register ro, rn
+# define CFI_RESTORE(reg) .cfi_restore reg
+
+# define CFI_PUSH(reg) \
+ CFI_ADJUST_CFA_OFFSET(8); CFI_REL_OFFSET(reg, 0)
+# define CFI_POP(reg) \
+ CFI_ADJUST_CFA_OFFSET(-8); CFI_RESTORE(reg)
+# define CFI_POP_TMP_REG() \
+ CFI_ADJUST_CFA_OFFSET(-8);
+# define CFI_LEAVE() \
+ CFI_ADJUST_CFA_OFFSET(-8); CFI_DEF_CFA_REGISTER(%rsp)
+
+/* CFA expressions are used for pointing CFA and registers to
+ * %rsp relative offsets. */
+# define DW_REGNO_rax 0
+# define DW_REGNO_rdx 1
+# define DW_REGNO_rcx 2
+# define DW_REGNO_rbx 3
+# define DW_REGNO_rsi 4
+# define DW_REGNO_rdi 5
+# define DW_REGNO_rbp 6
+# define DW_REGNO_rsp 7
+# define DW_REGNO_r8 8
+# define DW_REGNO_r9 9
+# define DW_REGNO_r10 10
+# define DW_REGNO_r11 11
+# define DW_REGNO_r12 12
+# define DW_REGNO_r13 13
+# define DW_REGNO_r14 14
+# define DW_REGNO_r15 15
+
+# define DW_REGNO(reg) DW_REGNO_ ## reg
+
+/* Fixed length encoding used for integers for now. */
+# define DW_SLEB128_7BIT(value) \
+ 0x00|((value) & 0x7f)
+# define DW_SLEB128_28BIT(value) \
+ 0x80|((value)&0x7f), \
+ 0x80|(((value)>>7)&0x7f), \
+ 0x80|(((value)>>14)&0x7f), \
+ 0x00|(((value)>>21)&0x7f)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \
+ .cfi_escape \
+ 0x0f, /* DW_CFA_def_cfa_expression */ \
+ DW_SLEB128_7BIT(11), /* length */ \
+ 0x77, /* DW_OP_breg7, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs), \
+ 0x06, /* DW_OP_deref */ \
+ 0x23, /* DW_OP_plus_constu */ \
+ DW_SLEB128_28BIT((cfa_depth)+8)
+
+# define CFI_REG_ON_STACK(reg,rsp_offs) \
+ .cfi_escape \
+ 0x10, /* DW_CFA_expression */ \
+ DW_SLEB128_7BIT(DW_REGNO(reg)), \
+ DW_SLEB128_7BIT(5), /* length */ \
+ 0x77, /* DW_OP_breg7, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs)
+
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_REMEMBER_STATE()
+# define CFI_RESTORE_STATE()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_REGISTER(ro,rn)
+# define CFI_RESTORE(reg)
+
+# define CFI_PUSH(reg)
+# define CFI_POP(reg)
+# define CFI_POP_TMP_REG()
+# define CFI_LEAVE()
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth)
+# define CFI_REG_ON_STACK(reg,rsp_offs)
+#endif
+
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ENTER_SYSV_FUNC_PARAMS_0_4 \
+ pushq %rdi; \
+ CFI_PUSH(%rdi); \
+ pushq %rsi; \
+ CFI_PUSH(%rsi); \
+ movq %rcx, %rdi; \
+ movq %rdx, %rsi; \
+ movq %r8, %rdx; \
+ movq %r9, %rcx; \
+
+# define ENTER_SYSV_FUNC_PARAMS_5 \
+ ENTER_SYSV_FUNC_PARAMS_0_4; \
+ movq 0x38(%rsp), %r8;
+
+# define ENTER_SYSV_FUNC_PARAMS_6 \
+ ENTER_SYSV_FUNC_PARAMS_5; \
+ movq 0x40(%rsp), %r9;
+
+# define EXIT_SYSV_FUNC \
+ popq %rsi; \
+ CFI_POP(%rsi); \
+ popq %rdi; \
+ CFI_POP(%rdi);
+#else
+# define ENTER_SYSV_FUNC_PARAMS_0_4
+# define ENTER_SYSV_FUNC_PARAMS_5
+# define ENTER_SYSV_FUNC_PARAMS_6
+# define EXIT_SYSV_FUNC
+#endif
+
+#endif /* GCRY_ASM_COMMON_AMD64_H */
diff --git a/comm/third_party/libgcrypt/cipher/asm-common-s390x.h b/comm/third_party/libgcrypt/cipher/asm-common-s390x.h
new file mode 100644
index 0000000000..b3a996cd6e
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/asm-common-s390x.h
@@ -0,0 +1,90 @@
+/* asm-common-s390x.h - Common macros for zSeries assembly
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_S390X_H
+#define GCRY_ASM_COMMON_S390X_H
+
+#include <config.h>
+
+#ifdef HAVE_GCC_ASM_ELF_DIRECTIVES
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+/* CFI directives to emit DWARF stack unwinding information. */
+# define CFI_STARTPROC() .cfi_startproc
+# define CFI_ENDPROC() .cfi_endproc
+# define CFI_REMEMBER_STATE() .cfi_remember_state
+# define CFI_RESTORE_STATE() .cfi_restore_state
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
+# define CFI_REGISTER(ro,rn) .cfi_register ro, rn
+# define CFI_RESTORE(reg) .cfi_restore reg
+
+/* CFA expressions are used for pointing CFA and registers to
+ * SP relative offsets. */
+# define DW_REGNO_SP 15
+
+/* Fixed length encoding used for integers for now. */
+# define DW_SLEB128_7BIT(value) \
+ 0x00|((value) & 0x7f)
+# define DW_SLEB128_28BIT(value) \
+ 0x80|((value)&0x7f), \
+ 0x80|(((value)>>7)&0x7f), \
+ 0x80|(((value)>>14)&0x7f), \
+ 0x00|(((value)>>21)&0x7f)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \
+ .cfi_escape \
+ 0x0f, /* DW_CFA_def_cfa_expression */ \
+ DW_SLEB128_7BIT(11), /* length */ \
+ 0x7f, /* DW_OP_breg15, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs), \
+ 0x06, /* DW_OP_deref */ \
+ 0x23, /* DW_OP_plus_constu */ \
+ DW_SLEB128_28BIT((cfa_depth)+160)
+
+# define CFI_REG_ON_STACK(regno,rsp_offs) \
+ .cfi_escape \
+ 0x10, /* DW_CFA_expression */ \
+ DW_SLEB128_7BIT(regno), \
+ DW_SLEB128_7BIT(5), /* length */ \
+ 0x7f, /* DW_OP_breg15, rsp + constant */ \
+ DW_SLEB128_28BIT(rsp_offs)
+
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_REMEMBER_STATE()
+# define CFI_RESTORE_STATE()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_REGISTER(ro,rn)
+# define CFI_RESTORE(reg)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth)
+# define CFI_REG_ON_STACK(reg,rsp_offs)
+#endif
+
+#endif /* GCRY_ASM_COMMON_AMD64_H */
diff --git a/comm/third_party/libgcrypt/cipher/asm-inline-s390x.h b/comm/third_party/libgcrypt/cipher/asm-inline-s390x.h
new file mode 100644
index 0000000000..bacb45fe2e
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/asm-inline-s390x.h
@@ -0,0 +1,157 @@
+/* asm-inline-s390x.h - Common macros for zSeries inline assembly
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_INLINE_S390X_H
+#define GCRY_ASM_INLINE_S390X_H
+
+#include <config.h>
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+
+typedef unsigned int u128_t __attribute__ ((mode (TI)));
+
+enum kmxx_functions_e
+{
+ KM_FUNCTION_AES_128 = 18,
+ KM_FUNCTION_AES_192 = 19,
+ KM_FUNCTION_AES_256 = 20,
+ KM_FUNCTION_XTS_AES_128 = 50,
+ KM_FUNCTION_XTS_AES_256 = 52,
+
+ KMID_FUNCTION_SHA1 = 1,
+ KMID_FUNCTION_SHA256 = 2,
+ KMID_FUNCTION_SHA512 = 3,
+ KMID_FUNCTION_SHA3_224 = 32,
+ KMID_FUNCTION_SHA3_256 = 33,
+ KMID_FUNCTION_SHA3_384 = 34,
+ KMID_FUNCTION_SHA3_512 = 35,
+ KMID_FUNCTION_SHAKE128 = 36,
+ KMID_FUNCTION_SHAKE256 = 37,
+ KMID_FUNCTION_GHASH = 65,
+};
+
+enum kmxx_function_flags_e
+{
+ KM_ENCRYPT = 0 << 7,
+ KM_DECRYPT = 1 << 7,
+
+ KMF_LCFB_16 = 16 << 24,
+
+ KMA_LPC = 1 << 8,
+ KMA_LAAD = 1 << 9,
+ KMA_HS = 1 << 10,
+
+ KLMD_PADDING_STATE = 1 << 8,
+};
+
+static ALWAYS_INLINE u128_t km_function_to_mask(enum kmxx_functions_e func)
+{
+ return (u128_t)1 << (127 - func);
+}
+
+static inline u128_t kimd_query(void)
+{
+ static u128_t function_codes = 0;
+ static int initialized = 0;
+ register unsigned long reg0 asm("0") = 0;
+ register void *reg1 asm("1") = &function_codes;
+ u128_t r1;
+
+ if (initialized)
+ return function_codes;
+
+ asm volatile ("0: .insn rre,0xb93e << 16, 0, %[r1]\n\t"
+ " brc 1,0b\n\t"
+ : [r1] "=a" (r1)
+ : [reg0] "r" (reg0), [reg1] "r" (reg1)
+ : "cc", "memory");
+
+ initialized = 1;
+ return function_codes;
+}
+
+static inline u128_t klmd_query(void)
+{
+ static u128_t function_codes = 0;
+ static int initialized = 0;
+ register unsigned long reg0 asm("0") = 0;
+ register void *reg1 asm("1") = &function_codes;
+ u128_t r1;
+
+ if (initialized)
+ return function_codes;
+
+ asm volatile ("0: .insn rre,0xb93f << 16, 0, %[r1]\n\t"
+ " brc 1,0b\n\t"
+ : [r1] "=a" (r1)
+ : [reg0] "r" (reg0), [reg1] "r" (reg1)
+ : "cc", "memory");
+
+ initialized = 1;
+ return function_codes;
+}
+
+static ALWAYS_INLINE void
+kimd_execute(unsigned int func, void *param_block, const void *src,
+ size_t src_len)
+{
+ register unsigned long reg0 asm("0") = func;
+ register byte *reg1 asm("1") = param_block;
+ u128_t r1 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+
+ asm volatile ("0: .insn rre,0xb93e << 16, 0, %[r1]\n\t"
+ " brc 1,0b\n\t"
+ : [r1] "+a" (r1)
+ : [func] "r" (reg0), [param_ptr] "r" (reg1)
+ : "cc", "memory");
+}
+
+static ALWAYS_INLINE void
+klmd_execute(unsigned int func, void *param_block, const void *src,
+ size_t src_len)
+{
+ register unsigned long reg0 asm("0") = func;
+ register byte *reg1 asm("1") = param_block;
+ u128_t r1 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+
+ asm volatile ("0: .insn rre,0xb93f << 16, 0, %[r1]\n\t"
+ " brc 1,0b\n\t"
+ : [func] "+r" (reg0), [r1] "+a" (r1)
+ : [param_ptr] "r" (reg1)
+ : "cc", "memory");
+}
+
+static ALWAYS_INLINE void
+klmd_shake_execute(unsigned int func, void *param_block, void *dst,
+ size_t dst_len, const void *src, size_t src_len)
+{
+ register unsigned long reg0 asm("0") = func;
+ register byte *reg1 asm("1") = param_block;
+ u128_t r1 = ((u128_t)(uintptr_t)dst << 64) | (u64)dst_len;
+ u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+
+ asm volatile ("0: .insn rre,0xb93f << 16, %[r1], %[r2]\n\t"
+ " brc 1,0b\n\t"
+ : [func] "+r" (reg0), [r1] "+a" (r1), [r2] "+a" (r2)
+ : [param_ptr] "r" (reg1)
+ : "cc", "memory");
+}
+
+#endif /* GCRY_ASM_INLINE_S390X_H */
diff --git a/comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h b/comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h
new file mode 100644
index 0000000000..9009270956
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h
@@ -0,0 +1,245 @@
+/* asm-common-aarch64.h - Poly1305 macros for ARMv8/AArch64 assembly
+ *
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_POLY1305_AARCH64_H
+#define GCRY_ASM_POLY1305_AARCH64_H
+
+#include "asm-common-aarch64.h"
+
+#ifdef __AARCH64EL__
+ #define le_to_host(reg) /*_*/
+#else
+ #define le_to_host(reg) rev reg, reg;
+#endif
+
+/**********************************************************************
+ poly1305 for stitched chacha20-poly1305 Aarch64 implementations
+ **********************************************************************/
+
+#define POLY_RSTATE x8
+#define POLY_RSRC x9
+
+#define POLY_R_H0 x10
+#define POLY_R_H1 x11
+#define POLY_R_H2 x12
+#define POLY_R_H2d w12
+#define POLY_R_R0 x13
+#define POLY_R_R1 x14
+#define POLY_R_R1_MUL5 x15
+#define POLY_R_X0_HI x16
+#define POLY_R_X0_LO x17
+#define POLY_R_X1_HI x19
+#define POLY_R_X1_LO x20
+#define POLY_R_ONE x21
+#define POLY_R_ONEd w21
+
+#define POLY_TMP0 x22
+#define POLY_TMP1 x23
+#define POLY_TMP2 x24
+#define POLY_TMP3 x25
+
+#define POLY_CHACHA_ROUND x26
+
+#define POLY_S_R0 (4 * 4 + 0 * 8)
+#define POLY_S_R1 (4 * 4 + 1 * 8)
+#define POLY_S_H0 (4 * 4 + 2 * 8 + 0 * 8)
+#define POLY_S_H1 (4 * 4 + 2 * 8 + 1 * 8)
+#define POLY_S_H2d (4 * 4 + 2 * 8 + 2 * 8)
+
+#define POLY1305_PUSH_REGS() \
+ stp x19, x20, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16); \
+ CFI_REG_ON_STACK(19, 0); \
+ CFI_REG_ON_STACK(20, 8); \
+ stp x21, x22, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16); \
+ CFI_REG_ON_STACK(21, 0); \
+ CFI_REG_ON_STACK(22, 8); \
+ stp x23, x24, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16); \
+ CFI_REG_ON_STACK(23, 0); \
+ CFI_REG_ON_STACK(24, 8); \
+ stp x25, x26, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16); \
+ CFI_REG_ON_STACK(25, 0); \
+ CFI_REG_ON_STACK(26, 8);
+
+#define POLY1305_POP_REGS() \
+ ldp x25, x26, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16); \
+ CFI_RESTORE(x25); \
+ CFI_RESTORE(x26); \
+ ldp x23, x24, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16); \
+ CFI_RESTORE(x23); \
+ CFI_RESTORE(x24); \
+ ldp x21, x22, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16); \
+ CFI_RESTORE(x21); \
+ CFI_RESTORE(x22); \
+ ldp x19, x20, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16); \
+ CFI_RESTORE(x19); \
+ CFI_RESTORE(x20);
+
+#define POLY1305_LOAD_STATE() \
+ ldr POLY_R_R1, [POLY_RSTATE, #(POLY_S_R1)]; \
+ ldr POLY_R_H0, [POLY_RSTATE, #(POLY_S_H0)]; \
+ ldr POLY_R_H1, [POLY_RSTATE, #(POLY_S_H1)]; \
+ ldr POLY_R_H2d, [POLY_RSTATE, #(POLY_S_H2d)]; \
+ ldr POLY_R_R0, [POLY_RSTATE, #(POLY_S_R0)]; \
+ add POLY_R_R1_MUL5, POLY_R_R1, POLY_R_R1, lsr #2; \
+ mov POLY_R_ONE, #1;
+
+#define POLY1305_STORE_STATE() \
+ str POLY_R_H0, [POLY_RSTATE, #(POLY_S_H0)]; \
+ str POLY_R_H1, [POLY_RSTATE, #(POLY_S_H1)]; \
+ str POLY_R_H2d, [POLY_RSTATE, #(POLY_S_H2d)];
+
+#define POLY1305_BLOCK_PART1(src_offset) \
+ /* a = h + m */ \
+ ldr POLY_TMP0, [POLY_RSRC, #((src_offset) + 0 * 8)];
+#define POLY1305_BLOCK_PART2(src_offset) \
+ ldr POLY_TMP1, [POLY_RSRC, #((src_offset) + 1 * 8)];
+#define POLY1305_BLOCK_PART3() \
+ le_to_host(POLY_TMP0);
+#define POLY1305_BLOCK_PART4() \
+ le_to_host(POLY_TMP1);
+#define POLY1305_BLOCK_PART5() \
+ adds POLY_R_H0, POLY_R_H0, POLY_TMP0;
+#define POLY1305_BLOCK_PART6() \
+ adcs POLY_R_H1, POLY_R_H1, POLY_TMP1;
+#define POLY1305_BLOCK_PART7() \
+ adc POLY_R_H2d, POLY_R_H2d, POLY_R_ONEd;
+
+#define POLY1305_BLOCK_PART8() \
+ /* h = a * r (partial mod 2^130-5): */ \
+ mul POLY_R_X1_LO, POLY_R_H0, POLY_R_R1; /* lo: h0 * r1 */
+#define POLY1305_BLOCK_PART9() \
+ mul POLY_TMP0, POLY_R_H1, POLY_R_R0; /* lo: h1 * r0 */
+#define POLY1305_BLOCK_PART10() \
+ mul POLY_R_X0_LO, POLY_R_H0, POLY_R_R0; /* lo: h0 * r0 */
+#define POLY1305_BLOCK_PART11() \
+ umulh POLY_R_X1_HI, POLY_R_H0, POLY_R_R1; /* hi: h0 * r1 */
+#define POLY1305_BLOCK_PART12() \
+ adds POLY_R_X1_LO, POLY_R_X1_LO, POLY_TMP0;
+#define POLY1305_BLOCK_PART13() \
+ umulh POLY_TMP1, POLY_R_H1, POLY_R_R0; /* hi: h1 * r0 */
+#define POLY1305_BLOCK_PART14() \
+ mul POLY_TMP2, POLY_R_H1, POLY_R_R1_MUL5; /* lo: h1 * r1 mod 2^130-5 */
+#define POLY1305_BLOCK_PART15() \
+ umulh POLY_R_X0_HI, POLY_R_H0, POLY_R_R0; /* hi: h0 * r0 */
+#define POLY1305_BLOCK_PART16() \
+ adc POLY_R_X1_HI, POLY_R_X1_HI, POLY_TMP1;
+#define POLY1305_BLOCK_PART17() \
+ umulh POLY_TMP3, POLY_R_H1, POLY_R_R1_MUL5; /* hi: h1 * r1 mod 2^130-5 */
+#define POLY1305_BLOCK_PART18() \
+ adds POLY_R_X0_LO, POLY_R_X0_LO, POLY_TMP2;
+#define POLY1305_BLOCK_PART19() \
+ mul POLY_R_H1, POLY_R_H2, POLY_R_R1_MUL5; /* h2 * r1 mod 2^130-5 */
+#define POLY1305_BLOCK_PART20() \
+ adc POLY_R_X0_HI, POLY_R_X0_HI, POLY_TMP3;
+#define POLY1305_BLOCK_PART21() \
+ mul POLY_R_H2, POLY_R_H2, POLY_R_R0; /* h2 * r0 */
+#define POLY1305_BLOCK_PART22() \
+ adds POLY_R_H1, POLY_R_H1, POLY_R_X1_LO;
+#define POLY1305_BLOCK_PART23() \
+ adc POLY_R_H0, POLY_R_H2, POLY_R_X1_HI;
+
+#define POLY1305_BLOCK_PART24() \
+ /* carry propagation */ \
+ and POLY_R_H2, POLY_R_H0, #3;
+#define POLY1305_BLOCK_PART25() \
+ lsr POLY_R_H0, POLY_R_H0, #2;
+#define POLY1305_BLOCK_PART26() \
+ add POLY_R_H0, POLY_R_H0, POLY_R_H0, lsl #2;
+#define POLY1305_BLOCK_PART27() \
+ adds POLY_R_H0, POLY_R_H0, POLY_R_X0_LO;
+#define POLY1305_BLOCK_PART28() \
+ adcs POLY_R_H1, POLY_R_H1, POLY_R_X0_HI;
+#define POLY1305_BLOCK_PART29() \
+ adc POLY_R_H2d, POLY_R_H2d, wzr;
+
+//#define TESTING_POLY1305_ASM
+#ifdef TESTING_POLY1305_ASM
+/* for testing only. */
+.align 3
+.globl _gcry_poly1305_aarch64_blocks1
+ELF(.type _gcry_poly1305_aarch64_blocks1,%function;)
+_gcry_poly1305_aarch64_blocks1:
+ /* input:
+ * x0: poly1305-state
+ * x1: src
+ * x2: nblks
+ */
+ CFI_STARTPROC()
+ POLY1305_PUSH_REGS();
+
+ mov POLY_RSTATE, x0;
+ mov POLY_RSRC, x1;
+
+ POLY1305_LOAD_STATE();
+
+.L_gcry_poly1305_aarch64_loop1:
+ POLY1305_BLOCK_PART1(0 * 16);
+ POLY1305_BLOCK_PART2(0 * 16);
+ add POLY_RSRC, POLY_RSRC, #16;
+ POLY1305_BLOCK_PART3();
+ POLY1305_BLOCK_PART4();
+ POLY1305_BLOCK_PART5();
+ POLY1305_BLOCK_PART6();
+ POLY1305_BLOCK_PART7();
+ POLY1305_BLOCK_PART8();
+ POLY1305_BLOCK_PART9();
+ POLY1305_BLOCK_PART10();
+ POLY1305_BLOCK_PART11();
+ POLY1305_BLOCK_PART12();
+ POLY1305_BLOCK_PART13();
+ POLY1305_BLOCK_PART14();
+ POLY1305_BLOCK_PART15();
+ POLY1305_BLOCK_PART16();
+ POLY1305_BLOCK_PART17();
+ POLY1305_BLOCK_PART18();
+ POLY1305_BLOCK_PART19();
+ POLY1305_BLOCK_PART20();
+ POLY1305_BLOCK_PART21();
+ POLY1305_BLOCK_PART22();
+ POLY1305_BLOCK_PART23();
+ POLY1305_BLOCK_PART24();
+ POLY1305_BLOCK_PART25();
+ POLY1305_BLOCK_PART26();
+ POLY1305_BLOCK_PART27();
+ POLY1305_BLOCK_PART28();
+ POLY1305_BLOCK_PART29();
+
+ subs x2, x2, #1;
+ b.ne .L_gcry_poly1305_aarch64_loop1;
+
+ POLY1305_STORE_STATE();
+
+ mov x0, #0;
+
+ POLY1305_POP_REGS();
+ ret;
+ CFI_ENDPROC()
+ELF(.size _gcry_poly1305_aarch64_blocks1, .-_gcry_poly1305_aarch64_blocks1;)
+#endif
+
+#endif /* GCRY_ASM_POLY1305_AARCH64_H */
diff --git a/comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h b/comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h
new file mode 100644
index 0000000000..3f99ea3e16
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h
@@ -0,0 +1,171 @@
+/* asm-common-amd64.h - Poly1305 macros for AMD64 assembly
+ *
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_POLY1305_AMD64_H
+#define GCRY_ASM_POLY1305_AMD64_H
+
+#include "asm-common-amd64.h"
+
+/**********************************************************************
+ poly1305 for stitched chacha20-poly1305 AMD64 implementations
+ **********************************************************************/
+
+#define POLY_RSTATE %r8
+#define POLY_RSRC %r9
+
+#define POLY_R_H0 %rbx
+#define POLY_R_H1 %rcx
+#define POLY_R_H2 %r10
+#define POLY_R_H2d %r10d
+#define POLY_R_R0 %r11
+#define POLY_R_R1_MUL5 %r12
+#define POLY_R_X0_HI %r13
+#define POLY_R_X0_LO %r14
+#define POLY_R_X1_HI %r15
+#define POLY_R_X1_LO %rsi
+
+#define POLY_S_R0 (4 * 4 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_R1 (4 * 4 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H0 (4 * 4 + 2 * 8 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_H1 (4 * 4 + 2 * 8 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H2d (4 * 4 + 2 * 8 + 2 * 8)(POLY_RSTATE)
+
+#define POLY1305_LOAD_STATE() \
+ movq POLY_S_H0, POLY_R_H0; \
+ movq POLY_S_H1, POLY_R_H1; \
+ movl POLY_S_H2d, POLY_R_H2d; \
+ movq POLY_S_R0, POLY_R_R0; \
+ movq POLY_S_R1, POLY_R_R1_MUL5; \
+ shrq $2, POLY_R_R1_MUL5; \
+ addq POLY_S_R1, POLY_R_R1_MUL5;
+
+#define POLY1305_STORE_STATE() \
+ movq POLY_R_H0, POLY_S_H0; \
+ movq POLY_R_H1, POLY_S_H1; \
+ movl POLY_R_H2d, POLY_S_H2d;
+
+/* a = h + m */
+#define POLY1305_BLOCK_PART1(src_offset) \
+ addq ((src_offset) + 0 * 8)(POLY_RSRC), POLY_R_H0; \
+ adcq ((src_offset) + 1 * 8)(POLY_RSRC), POLY_R_H1; \
+ adcl $1, POLY_R_H2d; \
+ \
+ /* h = a * r (partial mod 2^130-5): */ \
+ \
+ /* h0 * r1 */ \
+ movq POLY_R_H0, %rax; \
+ mulq POLY_S_R1; \
+ movq %rax, POLY_R_X1_LO; \
+ movq %rdx, POLY_R_X1_HI;
+
+#define POLY1305_BLOCK_PART2() \
+ \
+ /* h0 * r0 */ \
+ movq POLY_R_H0, %rax; \
+ mulq POLY_R_R0; \
+ movq %rax, POLY_R_X0_LO; \
+ movq %rdx, POLY_R_X0_HI;
+
+#define POLY1305_BLOCK_PART3() \
+ \
+ /* h1 * r0 */ \
+ movq POLY_R_H1, %rax; \
+ mulq POLY_R_R0; \
+ addq %rax, POLY_R_X1_LO; \
+ adcq %rdx, POLY_R_X1_HI; \
+ \
+ /* h1 * r1 mod 2^130-5 */ \
+ movq POLY_R_R1_MUL5, %rax; \
+ mulq POLY_R_H1;
+
+#define POLY1305_BLOCK_PART4() \
+ movq POLY_R_H2, POLY_R_H1; \
+ imulq POLY_R_R1_MUL5, POLY_R_H1; /* h2 * r1 mod 2^130-5 */ \
+ addq %rax, POLY_R_X0_LO; \
+ adcq %rdx, POLY_R_X0_HI; \
+ imulq POLY_R_R0, POLY_R_H2; /* h2 * r0 */ \
+ addq POLY_R_X1_LO, POLY_R_H1; \
+ adcq POLY_R_X1_HI, POLY_R_H2;
+
+#define POLY1305_BLOCK_PART5() \
+ \
+ /* carry propagation */ \
+ movq POLY_R_H2, POLY_R_H0; \
+ andl $3, POLY_R_H2d; \
+ shrq $2, POLY_R_H0; \
+ leaq (POLY_R_H0, POLY_R_H0, 4), POLY_R_H0; \
+ addq POLY_R_X0_LO, POLY_R_H0; \
+ adcq POLY_R_X0_HI, POLY_R_H1; \
+ adcl $0, POLY_R_H2d;
+
+#ifdef TESTING_POLY1305_ASM
+/* for testing only, mixed C/asm poly1305.c is marginally faster (~2%). */
+.align 8
+.globl _gcry_poly1305_amd64_ssse3_blocks1
+ELF(.type _gcry_poly1305_amd64_ssse3_blocks1,@function;)
+
+_gcry_poly1305_amd64_ssse3_blocks1:
+ /* input:
+ * %rdi: poly1305-state
+ * %rsi: src
+ * %rdx: nblks
+ */
+ pushq %rbp;
+ movq %rsp, %rbp;
+
+ subq $(10 * 8), %rsp;
+ movq %rbx, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ movq %r14, (4 * 8)(%rsp);
+ movq %r15, (5 * 8)(%rsp);
+
+ movq %rdx, (8 * 8)(%rsp); # NBLKS
+
+ movq %rdi, POLY_RSTATE;
+ movq %rsi, POLY_RSRC;
+
+ POLY1305_LOAD_STATE();
+
+.L_poly1:
+ POLY1305_BLOCK_PART1(0 * 16);
+ POLY1305_BLOCK_PART2();
+ POLY1305_BLOCK_PART3();
+ POLY1305_BLOCK_PART4();
+ POLY1305_BLOCK_PART5();
+
+ subq $1, (8 * 8)(%rsp); # NBLKS
+ leaq (16)(POLY_RSRC), POLY_RSRC;
+ jnz .L_poly1;
+
+ POLY1305_STORE_STATE();
+
+ movq (1 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ movq (4 * 8)(%rsp), %r14;
+ movq (5 * 8)(%rsp), %r15;
+
+ xorl %eax, %eax;
+ leave
+ ret;
+#endif
+
+#endif /* GCRY_ASM_POLY1305_AMD64_H */
diff --git a/comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h b/comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h
new file mode 100644
index 0000000000..113ab94913
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h
@@ -0,0 +1,140 @@
+/* asm-common-amd64.h - Poly1305 macros for zSeries assembly
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_POLY1305_S390X_H
+#define GCRY_ASM_POLY1305_S390X_H
+
+#include "asm-common-s390x.h"
+
+/**********************************************************************
+ poly1305 for stitched chacha20-poly1305
+ **********************************************************************/
+
+#define POLY_RSTATE %r1
+#define POLY_RSRC %r14
+
+#define POLY_R_H0_TMP_HI %r6 // even-
+#define POLY_R_H0 %r7 // odd pair
+#define POLY_R_H1_TMP_HI %r8 // even-
+#define POLY_R_H1 %r9 // odd pair
+#define POLY_R_H2 %r10
+#define POLY_R_R0 %r11
+#define POLY_R_R1 %r12
+#define POLY_R_R1_MUL5 %r13
+#define POLY_R_X0_HI %r2 // even-
+#define POLY_R_X0_LO %r3 // odd pair
+#define POLY_R_X1_HI %r4 // even-
+#define POLY_R_X1_LO %r5 // odd pair
+
+#define POLY_S_R0 (4 * 4 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_R1 (4 * 4 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H0 (4 * 4 + 2 * 8 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_H1 (4 * 4 + 2 * 8 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H2d (4 * 4 + 2 * 8 + 2 * 8)(POLY_RSTATE)
+
+#define INC_POLY1305_SRC(a) \
+ aghi POLY_RSRC, (a);
+
+#define POLY1305_LOAD_STATE() \
+ lg POLY_R_H0, POLY_S_H0; \
+ lg POLY_R_H1, POLY_S_H1; \
+ llgf POLY_R_H2, POLY_S_H2d; \
+ rllg POLY_R_H0, POLY_R_H0, 32; \
+ rllg POLY_R_H1, POLY_R_H1, 32; \
+ lg POLY_R_R0, POLY_S_R0; \
+ lg POLY_R_R1, POLY_S_R1; \
+ rllg POLY_R_R0, POLY_R_R0, 32; \
+ rllg POLY_R_R1, POLY_R_R1, 32; \
+ srlg POLY_R_R1_MUL5, POLY_R_R1, 2; \
+ algr POLY_R_R1_MUL5, POLY_R_R1;
+
+#define POLY1305_STORE_STATE() \
+ rllg POLY_R_H0, POLY_R_H0, 32; \
+ rllg POLY_R_H1, POLY_R_H1, 32; \
+ stg POLY_R_H0, POLY_S_H0; \
+ stg POLY_R_H1, POLY_S_H1; \
+ st POLY_R_H2, POLY_S_H2d;
+
+/* a = h + m */
+#define POLY1305_BLOCK_PART1_HB(src_offset, high_pad) \
+ lrvg POLY_R_X0_HI, ((src_offset) + 1 * 8)(POLY_RSRC); \
+ lrvg POLY_R_X0_LO, ((src_offset) + 0 * 8)(POLY_RSRC); \
+ lghi POLY_R_H1_TMP_HI, (high_pad);
+
+#define POLY1305_BLOCK_PART1(src_offset) \
+ POLY1305_BLOCK_PART1_HB(src_offset, 1);
+
+#define POLY1305_BLOCK_PART2() \
+ algr POLY_R_H0, POLY_R_X0_LO; \
+ alcgr POLY_R_H1, POLY_R_X0_HI; \
+ alcgr POLY_R_H2, POLY_R_H1_TMP_HI; \
+ lgr POLY_R_X1_LO, POLY_R_H0; \
+ lgr POLY_R_X0_LO, POLY_R_H0;
+
+#define POLY1305_BLOCK_PART3() \
+ /* h = a * r (partial mod 2^130-5): */ \
+ \
+ /* h0 * r1 */ \
+ mlgr POLY_R_X1_HI, POLY_R_R1; \
+ \
+ /* h1 * r0 */ \
+ lgr POLY_R_H0, POLY_R_H1; \
+ mlgr POLY_R_H0_TMP_HI, POLY_R_R0; \
+ \
+ /* h1 * r1 mod 2^130-5 */ \
+ mlgr POLY_R_H1_TMP_HI, POLY_R_R1_MUL5;
+
+#define POLY1305_BLOCK_PART4() \
+ \
+ /* h0 * r0 */ \
+ mlgr POLY_R_X0_HI, POLY_R_R0; \
+ \
+ algr POLY_R_X1_LO, POLY_R_H0; \
+ alcgr POLY_R_X1_HI, POLY_R_H0_TMP_HI; \
+ \
+ lgr POLY_R_H0_TMP_HI, POLY_R_H2; \
+ msgr POLY_R_H0_TMP_HI, POLY_R_R1_MUL5; /* h2 * r1 mod 2^130-5 */ \
+ msgr POLY_R_H2, POLY_R_R0; /* h2 * r0 */
+
+#define POLY1305_BLOCK_PART5() \
+ \
+ algr POLY_R_X0_LO, POLY_R_H1; \
+ alcgr POLY_R_X0_HI, POLY_R_H1_TMP_HI;
+
+#define POLY1305_BLOCK_PART6() \
+ \
+ algrk POLY_R_H1, POLY_R_H0_TMP_HI, POLY_R_X1_LO; \
+ alcgr POLY_R_H2, POLY_R_X1_HI;
+
+#define POLY1305_BLOCK_PART7() \
+ \
+ /* carry propagation */ \
+ srlg POLY_R_H0, POLY_R_H2, 2; \
+ risbgn POLY_R_X1_LO, POLY_R_H2, 0, 0x80 | 61, 0; \
+ lghi POLY_R_H1_TMP_HI, 0; \
+ agr POLY_R_H0, POLY_R_X1_LO; \
+ risbgn POLY_R_H2, POLY_R_H2, 62, 0x80 | 63, 0;
+
+#define POLY1305_BLOCK_PART8() \
+ algr POLY_R_H0, POLY_R_X0_LO; \
+ alcgr POLY_R_H1, POLY_R_X0_HI; \
+ alcgr POLY_R_H2, POLY_R_H1_TMP_HI;
+
+#endif /* GCRY_ASM_POLY1305_AMD64_H */
diff --git a/comm/third_party/libgcrypt/cipher/bithelp.h b/comm/third_party/libgcrypt/cipher/bithelp.h
new file mode 100644
index 0000000000..7793ce7ca3
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/bithelp.h
@@ -0,0 +1,123 @@
+/* bithelp.h - Some bit manipulation helpers
+ * Copyright (C) 1999, 2002 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GCRYPT_BITHELP_H
+#define GCRYPT_BITHELP_H
+
+#include "types.h"
+
+
+/****************
+ * Rotate the 32 bit unsigned integer X by N bits left/right
+ */
+static inline u32 rol(u32 x, int n)
+{
+ return ( (x << (n&(32-1))) | (x >> ((32-n)&(32-1))) );
+}
+
+static inline u32 ror(u32 x, int n)
+{
+ return ( (x >> (n&(32-1))) | (x << ((32-n)&(32-1))) );
+}
+
+static inline u64 rol64(u64 x, int n)
+{
+ return ( (x << (n&(64-1))) | (x >> ((64-n)&(64-1))) );
+}
+
+/* Byte swap for 32-bit and 64-bit integers. If available, use compiler
+ provided helpers. */
+#ifdef HAVE_BUILTIN_BSWAP32
+# define _gcry_bswap32 __builtin_bswap32
+#else
+static inline u32
+_gcry_bswap32(u32 x)
+{
+ return ((rol(x, 8) & 0x00ff00ffL) | (ror(x, 8) & 0xff00ff00L));
+}
+#endif
+
+#ifdef HAVE_BUILTIN_BSWAP64
+# define _gcry_bswap64 __builtin_bswap64
+#else
+static inline u64
+_gcry_bswap64(u64 x)
+{
+ return ((u64)_gcry_bswap32(x) << 32) | (_gcry_bswap32(x >> 32));
+}
+#endif
+
+/* Endian dependent byte swap operations. */
+#ifdef WORDS_BIGENDIAN
+# define le_bswap32(x) _gcry_bswap32(x)
+# define be_bswap32(x) ((u32)(x))
+# define le_bswap64(x) _gcry_bswap64(x)
+# define be_bswap64(x) ((u64)(x))
+#else
+# define le_bswap32(x) ((u32)(x))
+# define be_bswap32(x) _gcry_bswap32(x)
+# define le_bswap64(x) ((u64)(x))
+# define be_bswap64(x) _gcry_bswap64(x)
+#endif
+
+
+/* Count trailing zero bits in an unsigend int. We return an int
+ because that is what gcc's builtin does. Returns the number of
+ bits in X if X is 0. */
+static inline int
+_gcry_ctz (unsigned int x)
+{
+#if defined (HAVE_BUILTIN_CTZ)
+ return x ? __builtin_ctz (x) : 8 * sizeof (x);
+#else
+ /* See
+ * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightModLookup
+ */
+ static const unsigned char mod37[] =
+ {
+ sizeof (unsigned int)*8,
+ 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
+ 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
+ 5, 20, 8, 19, 18
+ };
+ return (int)mod37[(-x & x) % 37];
+#endif
+}
+
+
+/* Count trailing zero bits in an u64. We return an int because that
+ is what gcc's builtin does. Returns the number of bits in X if X
+ is 0. */
+static inline int
+_gcry_ctz64(u64 x)
+{
+#if defined (HAVE_BUILTIN_CTZL) && SIZEOF_UNSIGNED_LONG >= 8
+ return x ? __builtin_ctzl (x) : 8 * sizeof (x);
+#elif defined (HAVE_BUILTIN_CTZ) && SIZEOF_UNSIGNED_INT >= 8
+#warning hello
+ return x ? __builtin_ctz (x) : 8 * sizeof (x);
+#else
+ if ((x & 0xffffffff))
+ return _gcry_ctz (x);
+ else
+ return 32 + _gcry_ctz (x >> 32);
+#endif
+}
+
+
+#endif /*GCRYPT_BITHELP_H*/
diff --git a/comm/third_party/libgcrypt/cipher/blake2.c b/comm/third_party/libgcrypt/cipher/blake2.c
new file mode 100644
index 0000000000..f2bf49e522
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/blake2.c
@@ -0,0 +1,996 @@
+/* blake2.c - BLAKE2b and BLAKE2s hash functions (RFC 7693)
+ * Copyright (C) 2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 BLAKE2 reference implementation
+ * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/ref
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>
+ */
+
+#include <config.h>
+#include <string.h>
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AVX2) && defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+#define BLAKE2B_BLOCKBYTES 128
+#define BLAKE2B_OUTBYTES 64
+#define BLAKE2B_KEYBYTES 64
+
+#define BLAKE2S_BLOCKBYTES 64
+#define BLAKE2S_OUTBYTES 32
+#define BLAKE2S_KEYBYTES 32
+
+typedef struct
+{
+ u64 h[8];
+ u64 t[2];
+ u64 f[2];
+} BLAKE2B_STATE;
+
+struct blake2b_param_s
+{
+ byte digest_length;
+ byte key_length;
+ byte fanout;
+ byte depth;
+ byte leaf_length[4];
+ byte node_offset[4];
+ byte xof_length[4];
+ byte node_depth;
+ byte inner_length;
+ byte reserved[14];
+ byte salt[16];
+ byte personal[16];
+};
+
+typedef struct BLAKE2B_CONTEXT_S
+{
+ BLAKE2B_STATE state;
+ byte buf[BLAKE2B_BLOCKBYTES];
+ size_t buflen;
+ size_t outlen;
+#ifdef USE_AVX2
+ unsigned int use_avx2:1;
+#endif
+} BLAKE2B_CONTEXT;
+
+typedef struct
+{
+ u32 h[8];
+ u32 t[2];
+ u32 f[2];
+} BLAKE2S_STATE;
+
+struct blake2s_param_s
+{
+ byte digest_length;
+ byte key_length;
+ byte fanout;
+ byte depth;
+ byte leaf_length[4];
+ byte node_offset[4];
+ byte xof_length[2];
+ byte node_depth;
+ byte inner_length;
+ /* byte reserved[0]; */
+ byte salt[8];
+ byte personal[8];
+};
+
+typedef struct BLAKE2S_CONTEXT_S
+{
+ BLAKE2S_STATE state;
+ byte buf[BLAKE2S_BLOCKBYTES];
+ size_t buflen;
+ size_t outlen;
+#ifdef USE_AVX
+ unsigned int use_avx:1;
+#endif
+} BLAKE2S_CONTEXT;
+
+typedef unsigned int (*blake2_transform_t)(void *S, const void *inblk,
+ size_t nblks);
+
+
+static const u64 blake2b_IV[8] =
+{
+ U64_C(0x6a09e667f3bcc908), U64_C(0xbb67ae8584caa73b),
+ U64_C(0x3c6ef372fe94f82b), U64_C(0xa54ff53a5f1d36f1),
+ U64_C(0x510e527fade682d1), U64_C(0x9b05688c2b3e6c1f),
+ U64_C(0x1f83d9abfb41bd6b), U64_C(0x5be0cd19137e2179)
+};
+
+static const u32 blake2s_IV[8] =
+{
+ 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+ 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
+};
+
+static byte zero_block[BLAKE2B_BLOCKBYTES] = { 0, };
+
+
+static void blake2_write(void *S, const void *inbuf, size_t inlen,
+ byte *tmpbuf, size_t *tmpbuflen, size_t blkbytes,
+ blake2_transform_t transform_fn)
+{
+ const byte* in = inbuf;
+ unsigned int burn = 0;
+
+ if (inlen > 0)
+ {
+ size_t left = *tmpbuflen;
+ size_t fill = blkbytes - left;
+ size_t nblks;
+
+ if (inlen > fill)
+ {
+ if (fill > 0)
+ buf_cpy (tmpbuf + left, in, fill); /* Fill buffer */
+ left = 0;
+
+ burn = transform_fn (S, tmpbuf, 1); /* Increment counter + Compress */
+
+ in += fill;
+ inlen -= fill;
+
+ nblks = inlen / blkbytes - !(inlen % blkbytes);
+ if (nblks)
+ {
+ burn = transform_fn(S, in, nblks);
+ in += blkbytes * nblks;
+ inlen -= blkbytes * nblks;
+ }
+ }
+
+ gcry_assert (inlen > 0);
+
+ buf_cpy (tmpbuf + left, in, inlen);
+ *tmpbuflen = left + inlen;
+ }
+
+ if (burn)
+ _gcry_burn_stack (burn);
+
+ return;
+}
+
+
+static inline void blake2b_set_lastblock(BLAKE2B_STATE *S)
+{
+ S->f[0] = U64_C(0xffffffffffffffff);
+}
+
+static inline int blake2b_is_lastblock(const BLAKE2B_STATE *S)
+{
+ return S->f[0] != 0;
+}
+
+static inline void blake2b_increment_counter(BLAKE2B_STATE *S, const int inc)
+{
+ S->t[0] += (u64)inc;
+ S->t[1] += (S->t[0] < (u64)inc) - (inc < 0);
+}
+
+static inline u64 rotr64(u64 x, u64 n)
+{
+ return ((x >> (n & 63)) | (x << ((64 - n) & 63)));
+}
+
+static unsigned int blake2b_transform_generic(BLAKE2B_STATE *S,
+ const void *inblks,
+ size_t nblks)
+{
+ static const byte blake2b_sigma[12][16] =
+ {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+ };
+ const byte* in = inblks;
+ u64 m[16];
+ u64 v[16];
+
+ while (nblks--)
+ {
+ /* Increment counter */
+ blake2b_increment_counter (S, BLAKE2B_BLOCKBYTES);
+
+ /* Compress */
+ m[0] = buf_get_le64 (in + 0 * sizeof(m[0]));
+ m[1] = buf_get_le64 (in + 1 * sizeof(m[0]));
+ m[2] = buf_get_le64 (in + 2 * sizeof(m[0]));
+ m[3] = buf_get_le64 (in + 3 * sizeof(m[0]));
+ m[4] = buf_get_le64 (in + 4 * sizeof(m[0]));
+ m[5] = buf_get_le64 (in + 5 * sizeof(m[0]));
+ m[6] = buf_get_le64 (in + 6 * sizeof(m[0]));
+ m[7] = buf_get_le64 (in + 7 * sizeof(m[0]));
+ m[8] = buf_get_le64 (in + 8 * sizeof(m[0]));
+ m[9] = buf_get_le64 (in + 9 * sizeof(m[0]));
+ m[10] = buf_get_le64 (in + 10 * sizeof(m[0]));
+ m[11] = buf_get_le64 (in + 11 * sizeof(m[0]));
+ m[12] = buf_get_le64 (in + 12 * sizeof(m[0]));
+ m[13] = buf_get_le64 (in + 13 * sizeof(m[0]));
+ m[14] = buf_get_le64 (in + 14 * sizeof(m[0]));
+ m[15] = buf_get_le64 (in + 15 * sizeof(m[0]));
+
+ v[ 0] = S->h[0];
+ v[ 1] = S->h[1];
+ v[ 2] = S->h[2];
+ v[ 3] = S->h[3];
+ v[ 4] = S->h[4];
+ v[ 5] = S->h[5];
+ v[ 6] = S->h[6];
+ v[ 7] = S->h[7];
+ v[ 8] = blake2b_IV[0];
+ v[ 9] = blake2b_IV[1];
+ v[10] = blake2b_IV[2];
+ v[11] = blake2b_IV[3];
+ v[12] = blake2b_IV[4] ^ S->t[0];
+ v[13] = blake2b_IV[5] ^ S->t[1];
+ v[14] = blake2b_IV[6] ^ S->f[0];
+ v[15] = blake2b_IV[7] ^ S->f[1];
+
+#define G(r,i,a,b,c,d) \
+ do { \
+ a = a + b + m[blake2b_sigma[r][2*i+0]]; \
+ d = rotr64(d ^ a, 32); \
+ c = c + d; \
+ b = rotr64(b ^ c, 24); \
+ a = a + b + m[blake2b_sigma[r][2*i+1]]; \
+ d = rotr64(d ^ a, 16); \
+ c = c + d; \
+ b = rotr64(b ^ c, 63); \
+ } while(0)
+
+#define ROUND(r) \
+ do { \
+ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+ G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+ G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+ G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+ G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+ } while(0)
+
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+
+#undef G
+#undef ROUND
+
+ S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8];
+ S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8];
+ S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8];
+ S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8];
+ S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8];
+ S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8];
+ S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8];
+ S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8];
+
+ in += BLAKE2B_BLOCKBYTES;
+ }
+
+ return sizeof(void *) * 4 + sizeof(u64) * 16 * 2;
+}
+
+#ifdef USE_AVX2
+unsigned int _gcry_blake2b_transform_amd64_avx2(BLAKE2B_STATE *S,
+ const void *inblks,
+ size_t nblks) ASM_FUNC_ABI;
+#endif
+
+static unsigned int blake2b_transform(void *ctx, const void *inblks,
+ size_t nblks)
+{
+ BLAKE2B_CONTEXT *c = ctx;
+ unsigned int nburn;
+
+ if (0)
+ {}
+#ifdef USE_AVX2
+ if (c->use_avx2)
+ nburn = _gcry_blake2b_transform_amd64_avx2(&c->state, inblks, nblks);
+#endif
+ else
+ nburn = blake2b_transform_generic(&c->state, inblks, nblks);
+
+ if (nburn)
+ nburn += ASM_EXTRA_STACK;
+
+ return nburn;
+}
+
+static void blake2b_final(void *ctx)
+{
+ BLAKE2B_CONTEXT *c = ctx;
+ BLAKE2B_STATE *S = &c->state;
+ unsigned int burn;
+ size_t i;
+
+ gcry_assert (sizeof(c->buf) >= c->outlen);
+ if (blake2b_is_lastblock(S))
+ return;
+
+ if (c->buflen < BLAKE2B_BLOCKBYTES)
+ memset (c->buf + c->buflen, 0, BLAKE2B_BLOCKBYTES - c->buflen); /* Padding */
+ blake2b_set_lastblock (S);
+ blake2b_increment_counter (S, (int)c->buflen - BLAKE2B_BLOCKBYTES);
+ burn = blake2b_transform (ctx, c->buf, 1);
+
+ /* Output full hash to buffer */
+ for (i = 0; i < 8; ++i)
+ buf_put_le64 (c->buf + sizeof(S->h[i]) * i, S->h[i]);
+
+ /* Zero out extra buffer bytes. */
+ if (c->outlen < sizeof(c->buf))
+ memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen);
+
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+static byte *blake2b_read(void *ctx)
+{
+ BLAKE2B_CONTEXT *c = ctx;
+ return c->buf;
+}
+
+static void blake2b_write(void *ctx, const void *inbuf, size_t inlen)
+{
+ BLAKE2B_CONTEXT *c = ctx;
+ BLAKE2B_STATE *S = &c->state;
+ blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2B_BLOCKBYTES,
+ blake2b_transform);
+}
+
+static inline void blake2b_init_param(BLAKE2B_STATE *S,
+ const struct blake2b_param_s *P)
+{
+ const byte *p = (const byte *)P;
+ size_t i;
+
+ /* init xors IV with input parameter block */
+
+ /* IV XOR ParamBlock */
+ for (i = 0; i < 8; ++i)
+ S->h[i] = blake2b_IV[i] ^ buf_get_le64(p + sizeof(S->h[i]) * i);
+}
+
+static inline gcry_err_code_t blake2b_init(BLAKE2B_CONTEXT *ctx,
+ const byte *key, size_t keylen)
+{
+ struct blake2b_param_s P[1] = { { 0, } };
+ BLAKE2B_STATE *S = &ctx->state;
+
+ if (!ctx->outlen || ctx->outlen > BLAKE2B_OUTBYTES)
+ return GPG_ERR_INV_ARG;
+ if (sizeof(P[0]) != sizeof(u64) * 8)
+ return GPG_ERR_INTERNAL;
+ if (keylen && (!key || keylen > BLAKE2B_KEYBYTES))
+ return GPG_ERR_INV_KEYLEN;
+
+ P->digest_length = ctx->outlen;
+ P->key_length = keylen;
+ P->fanout = 1;
+ P->depth = 1;
+
+ blake2b_init_param (S, P);
+ wipememory (P, sizeof(P));
+
+ if (key)
+ {
+ blake2b_write (ctx, key, keylen);
+ blake2b_write (ctx, zero_block, BLAKE2B_BLOCKBYTES - keylen);
+ }
+
+ return 0;
+}
+
+static gcry_err_code_t blake2b_init_ctx(void *ctx, unsigned int flags,
+ const byte *key, size_t keylen,
+ unsigned int dbits)
+{
+ BLAKE2B_CONTEXT *c = ctx;
+ unsigned int features = _gcry_get_hw_features ();
+
+ (void)features;
+ (void)flags;
+
+ memset (c, 0, sizeof (*c));
+
+#ifdef USE_AVX2
+ c->use_avx2 = !!(features & HWF_INTEL_AVX2);
+#endif
+
+ c->outlen = dbits / 8;
+ c->buflen = 0;
+ return blake2b_init(c, key, keylen);
+}
+
+static inline void blake2s_set_lastblock(BLAKE2S_STATE *S)
+{
+ S->f[0] = 0xFFFFFFFFUL;
+}
+
+static inline int blake2s_is_lastblock(BLAKE2S_STATE *S)
+{
+ return S->f[0] != 0;
+}
+
+static inline void blake2s_increment_counter(BLAKE2S_STATE *S, const int inc)
+{
+ S->t[0] += (u32)inc;
+ S->t[1] += (S->t[0] < (u32)inc) - (inc < 0);
+}
+
+static unsigned int blake2s_transform_generic(BLAKE2S_STATE *S,
+ const void *inblks,
+ size_t nblks)
+{
+ static const byte blake2s_sigma[10][16] =
+ {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 },
+ };
+ unsigned int burn = 0;
+ const byte* in = inblks;
+ u32 m[16];
+ u32 v[16];
+
+ while (nblks--)
+ {
+ /* Increment counter */
+ blake2s_increment_counter (S, BLAKE2S_BLOCKBYTES);
+
+ /* Compress */
+ m[0] = buf_get_le32 (in + 0 * sizeof(m[0]));
+ m[1] = buf_get_le32 (in + 1 * sizeof(m[0]));
+ m[2] = buf_get_le32 (in + 2 * sizeof(m[0]));
+ m[3] = buf_get_le32 (in + 3 * sizeof(m[0]));
+ m[4] = buf_get_le32 (in + 4 * sizeof(m[0]));
+ m[5] = buf_get_le32 (in + 5 * sizeof(m[0]));
+ m[6] = buf_get_le32 (in + 6 * sizeof(m[0]));
+ m[7] = buf_get_le32 (in + 7 * sizeof(m[0]));
+ m[8] = buf_get_le32 (in + 8 * sizeof(m[0]));
+ m[9] = buf_get_le32 (in + 9 * sizeof(m[0]));
+ m[10] = buf_get_le32 (in + 10 * sizeof(m[0]));
+ m[11] = buf_get_le32 (in + 11 * sizeof(m[0]));
+ m[12] = buf_get_le32 (in + 12 * sizeof(m[0]));
+ m[13] = buf_get_le32 (in + 13 * sizeof(m[0]));
+ m[14] = buf_get_le32 (in + 14 * sizeof(m[0]));
+ m[15] = buf_get_le32 (in + 15 * sizeof(m[0]));
+
+ v[ 0] = S->h[0];
+ v[ 1] = S->h[1];
+ v[ 2] = S->h[2];
+ v[ 3] = S->h[3];
+ v[ 4] = S->h[4];
+ v[ 5] = S->h[5];
+ v[ 6] = S->h[6];
+ v[ 7] = S->h[7];
+ v[ 8] = blake2s_IV[0];
+ v[ 9] = blake2s_IV[1];
+ v[10] = blake2s_IV[2];
+ v[11] = blake2s_IV[3];
+ v[12] = S->t[0] ^ blake2s_IV[4];
+ v[13] = S->t[1] ^ blake2s_IV[5];
+ v[14] = S->f[0] ^ blake2s_IV[6];
+ v[15] = S->f[1] ^ blake2s_IV[7];
+
+#define G(r,i,a,b,c,d) \
+ do { \
+ a = a + b + m[blake2s_sigma[r][2*i+0]]; \
+ d = ror(d ^ a, 16); \
+ c = c + d; \
+ b = ror(b ^ c, 12); \
+ a = a + b + m[blake2s_sigma[r][2*i+1]]; \
+ d = ror(d ^ a, 8); \
+ c = c + d; \
+ b = ror(b ^ c, 7); \
+ } while(0)
+
+#define ROUND(r) \
+ do { \
+ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+ G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+ G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+ G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+ G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+ } while(0)
+
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+
+#undef G
+#undef ROUND
+
+ S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8];
+ S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8];
+ S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8];
+ S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8];
+ S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8];
+ S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8];
+ S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8];
+ S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8];
+
+ in += BLAKE2S_BLOCKBYTES;
+ }
+
+ return burn;
+}
+
+#ifdef USE_AVX
+unsigned int _gcry_blake2s_transform_amd64_avx(BLAKE2S_STATE *S,
+ const void *inblks,
+ size_t nblks) ASM_FUNC_ABI;
+#endif
+
+static unsigned int blake2s_transform(void *ctx, const void *inblks,
+ size_t nblks)
+{
+ BLAKE2S_CONTEXT *c = ctx;
+ unsigned int nburn;
+
+ if (0)
+ {}
+#ifdef USE_AVX
+ if (c->use_avx)
+ nburn = _gcry_blake2s_transform_amd64_avx(&c->state, inblks, nblks);
+#endif
+ else
+ nburn = blake2s_transform_generic(&c->state, inblks, nblks);
+
+ if (nburn)
+ nburn += ASM_EXTRA_STACK;
+
+ return nburn;
+}
+
+static void blake2s_final(void *ctx)
+{
+ BLAKE2S_CONTEXT *c = ctx;
+ BLAKE2S_STATE *S = &c->state;
+ unsigned int burn;
+ size_t i;
+
+ gcry_assert (sizeof(c->buf) >= c->outlen);
+ if (blake2s_is_lastblock(S))
+ return;
+
+ if (c->buflen < BLAKE2S_BLOCKBYTES)
+ memset (c->buf + c->buflen, 0, BLAKE2S_BLOCKBYTES - c->buflen); /* Padding */
+ blake2s_set_lastblock (S);
+ blake2s_increment_counter (S, (int)c->buflen - BLAKE2S_BLOCKBYTES);
+ burn = blake2s_transform (ctx, c->buf, 1);
+
+ /* Output full hash to buffer */
+ for (i = 0; i < 8; ++i)
+ buf_put_le32 (c->buf + sizeof(S->h[i]) * i, S->h[i]);
+
+ /* Zero out extra buffer bytes. */
+ if (c->outlen < sizeof(c->buf))
+ memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen);
+
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+static byte *blake2s_read(void *ctx)
+{
+ BLAKE2S_CONTEXT *c = ctx;
+ return c->buf;
+}
+
+static void blake2s_write(void *ctx, const void *inbuf, size_t inlen)
+{
+ BLAKE2S_CONTEXT *c = ctx;
+ BLAKE2S_STATE *S = &c->state;
+ blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2S_BLOCKBYTES,
+ blake2s_transform);
+}
+
+static inline void blake2s_init_param(BLAKE2S_STATE *S,
+ const struct blake2s_param_s *P)
+{
+ const byte *p = (const byte *)P;
+ size_t i;
+
+ /* init2 xors IV with input parameter block */
+
+ /* IV XOR ParamBlock */
+ for (i = 0; i < 8; ++i)
+ S->h[i] ^= blake2s_IV[i] ^ buf_get_le32(&p[i * 4]);
+}
+
+static inline gcry_err_code_t blake2s_init(BLAKE2S_CONTEXT *ctx,
+ const byte *key, size_t keylen)
+{
+ struct blake2s_param_s P[1] = { { 0, } };
+ BLAKE2S_STATE *S = &ctx->state;
+
+ if (!ctx->outlen || ctx->outlen > BLAKE2S_OUTBYTES)
+ return GPG_ERR_INV_ARG;
+ if (sizeof(P[0]) != sizeof(u32) * 8)
+ return GPG_ERR_INTERNAL;
+ if (keylen && (!key || keylen > BLAKE2S_KEYBYTES))
+ return GPG_ERR_INV_KEYLEN;
+
+ P->digest_length = ctx->outlen;
+ P->key_length = keylen;
+ P->fanout = 1;
+ P->depth = 1;
+
+ blake2s_init_param (S, P);
+ wipememory (P, sizeof(P));
+
+ if (key)
+ {
+ blake2s_write (ctx, key, keylen);
+ blake2s_write (ctx, zero_block, BLAKE2S_BLOCKBYTES - keylen);
+ }
+
+ return 0;
+}
+
+static gcry_err_code_t blake2s_init_ctx(void *ctx, unsigned int flags,
+ const byte *key, size_t keylen,
+ unsigned int dbits)
+{
+ BLAKE2S_CONTEXT *c = ctx;
+ unsigned int features = _gcry_get_hw_features ();
+
+ (void)features;
+ (void)flags;
+
+ memset (c, 0, sizeof (*c));
+
+#ifdef USE_AVX
+ c->use_avx = !!(features & HWF_INTEL_AVX);
+#endif
+
+ c->outlen = dbits / 8;
+ c->buflen = 0;
+ return blake2s_init(c, key, keylen);
+}
+
+/* Selftests from "RFC 7693, Appendix E. BLAKE2b and BLAKE2s Self-Test
+ * Module C Source". */
+static void selftest_seq(byte *out, size_t len, u32 seed)
+{
+ size_t i;
+ u32 t, a, b;
+
+ a = 0xDEAD4BAD * seed;
+ b = 1;
+
+ for (i = 0; i < len; i++)
+ {
+ t = a + b;
+ a = b;
+ b = t;
+ out[i] = (t >> 24) & 0xFF;
+ }
+}
+
+static gpg_err_code_t
+selftests_blake2b (int algo, int extended, selftest_report_func_t report)
+{
+ static const byte blake2b_res[32] =
+ {
+ 0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD,
+ 0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56,
+ 0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73,
+ 0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75
+ };
+ static const size_t b2b_md_len[4] = { 20, 32, 48, 64 };
+ static const size_t b2b_in_len[6] = { 0, 3, 128, 129, 255, 1024 };
+ size_t i, j, outlen, inlen;
+ byte in[1024], key[64];
+ BLAKE2B_CONTEXT ctx;
+ BLAKE2B_CONTEXT ctx2;
+ const char *what;
+ const char *errtxt;
+
+ (void)extended;
+
+ what = "rfc7693 BLAKE2b selftest";
+
+ /* 256-bit hash for testing */
+ if (blake2b_init_ctx(&ctx, 0, NULL, 0, 32 * 8))
+ {
+ errtxt = "init failed";
+ goto failed;
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ outlen = b2b_md_len[i];
+ for (j = 0; j < 6; j++)
+ {
+ inlen = b2b_in_len[j];
+
+ selftest_seq(in, inlen, inlen); /* unkeyed hash */
+ blake2b_init_ctx(&ctx2, 0, NULL, 0, outlen * 8);
+ blake2b_write(&ctx2, in, inlen);
+ blake2b_final(&ctx2);
+ blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+
+ selftest_seq(key, outlen, outlen); /* keyed hash */
+ blake2b_init_ctx(&ctx2, 0, key, outlen, outlen * 8);
+ blake2b_write(&ctx2, in, inlen);
+ blake2b_final(&ctx2);
+ blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+ }
+ }
+
+ /* compute and compare the hash of hashes */
+ blake2b_final(&ctx);
+ for (i = 0; i < 32; i++)
+ {
+ if (ctx.buf[i] != blake2b_res[i])
+ {
+ errtxt = "digest mismatch";
+ goto failed;
+ }
+ }
+
+ return 0;
+
+failed:
+ if (report)
+ report ("digest", algo, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+selftests_blake2s (int algo, int extended, selftest_report_func_t report)
+{
+ static const byte blake2s_res[32] =
+ {
+ 0x6A, 0x41, 0x1F, 0x08, 0xCE, 0x25, 0xAD, 0xCD,
+ 0xFB, 0x02, 0xAB, 0xA6, 0x41, 0x45, 0x1C, 0xEC,
+ 0x53, 0xC5, 0x98, 0xB2, 0x4F, 0x4F, 0xC7, 0x87,
+ 0xFB, 0xDC, 0x88, 0x79, 0x7F, 0x4C, 0x1D, 0xFE
+ };
+ static const size_t b2s_md_len[4] = { 16, 20, 28, 32 };
+ static const size_t b2s_in_len[6] = { 0, 3, 64, 65, 255, 1024 };
+ size_t i, j, outlen, inlen;
+ byte in[1024], key[32];
+ BLAKE2S_CONTEXT ctx;
+ BLAKE2S_CONTEXT ctx2;
+ const char *what;
+ const char *errtxt;
+
+ (void)extended;
+
+ what = "rfc7693 BLAKE2s selftest";
+
+ /* 256-bit hash for testing */
+ if (blake2s_init_ctx(&ctx, 0, NULL, 0, 32 * 8))
+ {
+ errtxt = "init failed";
+ goto failed;
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ outlen = b2s_md_len[i];
+ for (j = 0; j < 6; j++)
+ {
+ inlen = b2s_in_len[j];
+
+ selftest_seq(in, inlen, inlen); /* unkeyed hash */
+ blake2s_init_ctx(&ctx2, 0, NULL, 0, outlen * 8);
+ blake2s_write(&ctx2, in, inlen);
+ blake2s_final(&ctx2);
+ blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+
+ selftest_seq(key, outlen, outlen); /* keyed hash */
+ blake2s_init_ctx(&ctx2, 0, key, outlen, outlen * 8);
+ blake2s_write(&ctx2, in, inlen);
+ blake2s_final(&ctx2);
+ blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+ }
+ }
+
+ /* compute and compare the hash of hashes */
+ blake2s_final(&ctx);
+ for (i = 0; i < 32; i++)
+ {
+ if (ctx.buf[i] != blake2s_res[i])
+ {
+ errtxt = "digest mismatch";
+ goto failed;
+ }
+ }
+
+ return 0;
+
+failed:
+ if (report)
+ report ("digest", algo, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags,
+ const unsigned char *key,
+ size_t keylen, int algo)
+{
+ gcry_err_code_t rc;
+ switch (algo)
+ {
+ case GCRY_MD_BLAKE2B_512:
+ rc = blake2b_init_ctx (ctx, flags, key, keylen, 512);
+ break;
+ case GCRY_MD_BLAKE2B_384:
+ rc = blake2b_init_ctx (ctx, flags, key, keylen, 384);
+ break;
+ case GCRY_MD_BLAKE2B_256:
+ rc = blake2b_init_ctx (ctx, flags, key, keylen, 256);
+ break;
+ case GCRY_MD_BLAKE2B_160:
+ rc = blake2b_init_ctx (ctx, flags, key, keylen, 160);
+ break;
+ case GCRY_MD_BLAKE2S_256:
+ rc = blake2s_init_ctx (ctx, flags, key, keylen, 256);
+ break;
+ case GCRY_MD_BLAKE2S_224:
+ rc = blake2s_init_ctx (ctx, flags, key, keylen, 224);
+ break;
+ case GCRY_MD_BLAKE2S_160:
+ rc = blake2s_init_ctx (ctx, flags, key, keylen, 160);
+ break;
+ case GCRY_MD_BLAKE2S_128:
+ rc = blake2s_init_ctx (ctx, flags, key, keylen, 128);
+ break;
+ default:
+ rc = GPG_ERR_DIGEST_ALGO;
+ break;
+ }
+
+ return rc;
+}
+
+
+#define DEFINE_BLAKE2_VARIANT(bs, BS, dbits, oid_branch) \
+ static void blake2##bs##_##dbits##_init(void *ctx, unsigned int flags) \
+ { \
+ int err = blake2##bs##_init_ctx (ctx, flags, NULL, 0, dbits); \
+ gcry_assert (err == 0); \
+ } \
+ static void \
+ _gcry_blake2##bs##_##dbits##_hash_buffer(void *outbuf, \
+ const void *buffer, size_t length) \
+ { \
+ BLAKE2##BS##_CONTEXT hd; \
+ blake2##bs##_##dbits##_init (&hd, 0); \
+ blake2##bs##_write (&hd, buffer, length); \
+ blake2##bs##_final (&hd); \
+ memcpy (outbuf, blake2##bs##_read (&hd), dbits / 8); \
+ } \
+ static void \
+ _gcry_blake2##bs##_##dbits##_hash_buffers(void *outbuf, \
+ const gcry_buffer_t *iov, int iovcnt) \
+ { \
+ BLAKE2##BS##_CONTEXT hd; \
+ blake2##bs##_##dbits##_init (&hd, 0); \
+ for (;iovcnt > 0; iov++, iovcnt--) \
+ blake2##bs##_write (&hd, (const char*)iov[0].data + iov[0].off, \
+ iov[0].len); \
+ blake2##bs##_final (&hd); \
+ memcpy (outbuf, blake2##bs##_read (&hd), dbits / 8); \
+ } \
+ static byte blake2##bs##_##dbits##_asn[] = { 0x30 }; \
+ static gcry_md_oid_spec_t oid_spec_blake2##bs##_##dbits[] = \
+ { \
+ { " 1.3.6.1.4.1.1722.12.2." oid_branch }, \
+ { NULL } \
+ }; \
+ gcry_md_spec_t _gcry_digest_spec_blake2##bs##_##dbits = \
+ { \
+ GCRY_MD_BLAKE2##BS##_##dbits, {0, 0}, \
+ "BLAKE2" #BS "_" #dbits, blake2##bs##_##dbits##_asn, \
+ DIM (blake2##bs##_##dbits##_asn), oid_spec_blake2##bs##_##dbits, \
+ dbits / 8, blake2##bs##_##dbits##_init, blake2##bs##_write, \
+ blake2##bs##_final, blake2##bs##_read, NULL, \
+ _gcry_blake2##bs##_##dbits##_hash_buffer, \
+ _gcry_blake2##bs##_##dbits##_hash_buffers, \
+ sizeof (BLAKE2##BS##_CONTEXT), selftests_blake2##bs \
+ };
+
+DEFINE_BLAKE2_VARIANT(b, B, 512, "1.16")
+DEFINE_BLAKE2_VARIANT(b, B, 384, "1.12")
+DEFINE_BLAKE2_VARIANT(b, B, 256, "1.8")
+DEFINE_BLAKE2_VARIANT(b, B, 160, "1.5")
+
+DEFINE_BLAKE2_VARIANT(s, S, 256, "2.8")
+DEFINE_BLAKE2_VARIANT(s, S, 224, "2.7")
+DEFINE_BLAKE2_VARIANT(s, S, 160, "2.5")
+DEFINE_BLAKE2_VARIANT(s, S, 128, "2.4")
diff --git a/comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S b/comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S
new file mode 100644
index 0000000000..357e8a5167
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S
@@ -0,0 +1,300 @@
+/* blake2b-amd64-avx2.S - AVX2 implementation of BLAKE2b
+ *
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 BLAKE2 reference implementation
+ * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/sse
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* register macros */
+#define RSTATE %rdi
+#define RINBLKS %rsi
+#define RNBLKS %rdx
+#define RIV %rcx
+
+/* state structure */
+#define STATE_H 0
+#define STATE_T (STATE_H + 8 * 8)
+#define STATE_F (STATE_T + 2 * 8)
+
+/* vector registers */
+#define ROW1 %ymm0
+#define ROW2 %ymm1
+#define ROW3 %ymm2
+#define ROW4 %ymm3
+#define TMP1 %ymm4
+#define TMP1x %xmm4
+#define R16 %ymm5
+#define R24 %ymm6
+
+#define MA1 %ymm8
+#define MA2 %ymm9
+#define MA3 %ymm10
+#define MA4 %ymm11
+#define MA1x %xmm8
+#define MA2x %xmm9
+#define MA3x %xmm10
+#define MA4x %xmm11
+
+#define MB1 %ymm12
+#define MB2 %ymm13
+#define MB3 %ymm14
+#define MB4 %ymm15
+#define MB1x %xmm12
+#define MB2x %xmm13
+#define MB3x %xmm14
+#define MB4x %xmm15
+
+/**********************************************************************
+ blake2b/AVX2
+ **********************************************************************/
+
+#define GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ s0, s1, s2, s3, s4, s5, s6, s7, s8, \
+ s9, s10, s11, s12, s13, s14, s15) \
+ vmovq (s0)*8(RINBLKS), m1x; \
+ vmovq (s4)*8(RINBLKS), TMP1x; \
+ vpinsrq $1, (s2)*8(RINBLKS), m1x, m1x; \
+ vpinsrq $1, (s6)*8(RINBLKS), TMP1x, TMP1x; \
+ vinserti128 $1, TMP1x, m1, m1; \
+ vmovq (s1)*8(RINBLKS), m2x; \
+ vmovq (s5)*8(RINBLKS), TMP1x; \
+ vpinsrq $1, (s3)*8(RINBLKS), m2x, m2x; \
+ vpinsrq $1, (s7)*8(RINBLKS), TMP1x, TMP1x; \
+ vinserti128 $1, TMP1x, m2, m2; \
+ vmovq (s8)*8(RINBLKS), m3x; \
+ vmovq (s12)*8(RINBLKS), TMP1x; \
+ vpinsrq $1, (s10)*8(RINBLKS), m3x, m3x; \
+ vpinsrq $1, (s14)*8(RINBLKS), TMP1x, TMP1x; \
+ vinserti128 $1, TMP1x, m3, m3; \
+ vmovq (s9)*8(RINBLKS), m4x; \
+ vmovq (s13)*8(RINBLKS), TMP1x; \
+ vpinsrq $1, (s11)*8(RINBLKS), m4x, m4x; \
+ vpinsrq $1, (s15)*8(RINBLKS), TMP1x, TMP1x; \
+ vinserti128 $1, TMP1x, m4, m4;
+
+#define LOAD_MSG_0(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+#define LOAD_MSG_1(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3)
+#define LOAD_MSG_2(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4)
+#define LOAD_MSG_3(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8)
+#define LOAD_MSG_4(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13)
+#define LOAD_MSG_5(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9)
+#define LOAD_MSG_6(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11)
+#define LOAD_MSG_7(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10)
+#define LOAD_MSG_8(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5)
+#define LOAD_MSG_9(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0)
+#define LOAD_MSG_10(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ LOAD_MSG_0(m1, m2, m3, m4, m1x, m2x, m3x, m4x)
+#define LOAD_MSG_11(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+ LOAD_MSG_1(m1, m2, m3, m4, m1x, m2x, m3x, m4x)
+
+#define LOAD_MSG(r, m1, m2, m3, m4) \
+ LOAD_MSG_##r(m1, m2, m3, m4, m1##x, m2##x, m3##x, m4##x)
+
+#define ROR_32(in, out) vpshufd $0xb1, in, out;
+
+#define ROR_24(in, out) vpshufb R24, in, out;
+
+#define ROR_16(in, out) vpshufb R16, in, out;
+
+#define ROR_63(in, out) \
+ vpsrlq $63, in, TMP1; \
+ vpaddq in, in, out; \
+ vpxor TMP1, out, out;
+
+#define G(r1, r2, r3, r4, m, ROR_A, ROR_B) \
+ vpaddq m, r1, r1; \
+ vpaddq r2, r1, r1; \
+ vpxor r1, r4, r4; \
+ ROR_A(r4, r4); \
+ vpaddq r4, r3, r3; \
+ vpxor r3, r2, r2; \
+ ROR_B(r2, r2);
+
+#define G1(r1, r2, r3, r4, m) \
+ G(r1, r2, r3, r4, m, ROR_32, ROR_24);
+
+#define G2(r1, r2, r3, r4, m) \
+ G(r1, r2, r3, r4, m, ROR_16, ROR_63);
+
+#define MM_SHUFFLE(z,y,x,w) \
+ (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
+
+#define DIAGONALIZE(r1, r2, r3, r4) \
+ vpermq $MM_SHUFFLE(0,3,2,1), r2, r2; \
+ vpermq $MM_SHUFFLE(1,0,3,2), r3, r3; \
+ vpermq $MM_SHUFFLE(2,1,0,3), r4, r4;
+
+#define UNDIAGONALIZE(r1, r2, r3, r4) \
+ vpermq $MM_SHUFFLE(2,1,0,3), r2, r2; \
+ vpermq $MM_SHUFFLE(1,0,3,2), r3, r3; \
+ vpermq $MM_SHUFFLE(0,3,2,1), r4, r4;
+
+#define ROUND(r, m1, m2, m3, m4) \
+ G1(ROW1, ROW2, ROW3, ROW4, m1); \
+ G2(ROW1, ROW2, ROW3, ROW4, m2); \
+ DIAGONALIZE(ROW1, ROW2, ROW3, ROW4); \
+ G1(ROW1, ROW2, ROW3, ROW4, m3); \
+ G2(ROW1, ROW2, ROW3, ROW4, m4); \
+ UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4);
+
+blake2b_data:
+.align 32
+.Liv:
+ .quad 0x6a09e667f3bcc908, 0xbb67ae8584caa73b
+ .quad 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1
+ .quad 0x510e527fade682d1, 0x9b05688c2b3e6c1f
+ .quad 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
+.Lshuf_ror16:
+ .byte 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9
+.Lshuf_ror24:
+ .byte 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10
+
+.align 64
+.globl _gcry_blake2b_transform_amd64_avx2
+ELF(.type _gcry_blake2b_transform_amd64_avx2,@function;)
+
+_gcry_blake2b_transform_amd64_avx2:
+ /* input:
+ * %rdi: state
+ * %rsi: blks
+ * %rdx: num_blks
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ addq $128, (STATE_T + 0)(RSTATE);
+ adcq $0, (STATE_T + 8)(RSTATE);
+
+ vbroadcasti128 .Lshuf_ror16 rRIP, R16;
+ vbroadcasti128 .Lshuf_ror24 rRIP, R24;
+
+ vmovdqa .Liv+(0 * 8) rRIP, ROW3;
+ vmovdqa .Liv+(4 * 8) rRIP, ROW4;
+
+ vmovdqu (STATE_H + 0 * 8)(RSTATE), ROW1;
+ vmovdqu (STATE_H + 4 * 8)(RSTATE), ROW2;
+
+ vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+ LOAD_MSG(0, MA1, MA2, MA3, MA4);
+ LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+.Loop:
+ ROUND(0, MA1, MA2, MA3, MA4);
+ LOAD_MSG(2, MA1, MA2, MA3, MA4);
+ ROUND(1, MB1, MB2, MB3, MB4);
+ LOAD_MSG(3, MB1, MB2, MB3, MB4);
+ ROUND(2, MA1, MA2, MA3, MA4);
+ LOAD_MSG(4, MA1, MA2, MA3, MA4);
+ ROUND(3, MB1, MB2, MB3, MB4);
+ LOAD_MSG(5, MB1, MB2, MB3, MB4);
+ ROUND(4, MA1, MA2, MA3, MA4);
+ LOAD_MSG(6, MA1, MA2, MA3, MA4);
+ ROUND(5, MB1, MB2, MB3, MB4);
+ LOAD_MSG(7, MB1, MB2, MB3, MB4);
+ ROUND(6, MA1, MA2, MA3, MA4);
+ LOAD_MSG(8, MA1, MA2, MA3, MA4);
+ ROUND(7, MB1, MB2, MB3, MB4);
+ LOAD_MSG(9, MB1, MB2, MB3, MB4);
+ ROUND(8, MA1, MA2, MA3, MA4);
+ LOAD_MSG(10, MA1, MA2, MA3, MA4);
+ ROUND(9, MB1, MB2, MB3, MB4);
+ LOAD_MSG(11, MB1, MB2, MB3, MB4);
+ sub $1, RNBLKS;
+ jz .Loop_end;
+
+ lea 128(RINBLKS), RINBLKS;
+ addq $128, (STATE_T + 0)(RSTATE);
+ adcq $0, (STATE_T + 8)(RSTATE);
+
+ ROUND(10, MA1, MA2, MA3, MA4);
+ LOAD_MSG(0, MA1, MA2, MA3, MA4);
+ ROUND(11, MB1, MB2, MB3, MB4);
+ LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+ vpxor ROW3, ROW1, ROW1;
+ vpxor ROW4, ROW2, ROW2;
+
+ vmovdqa .Liv+(0 * 8) rRIP, ROW3;
+ vmovdqa .Liv+(4 * 8) rRIP, ROW4;
+
+ vpxor (STATE_H + 0 * 8)(RSTATE), ROW1, ROW1;
+ vpxor (STATE_H + 4 * 8)(RSTATE), ROW2, ROW2;
+
+ vmovdqu ROW1, (STATE_H + 0 * 8)(RSTATE);
+ vmovdqu ROW2, (STATE_H + 4 * 8)(RSTATE);
+
+ vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+ jmp .Loop;
+
+.Loop_end:
+ ROUND(10, MA1, MA2, MA3, MA4);
+ ROUND(11, MB1, MB2, MB3, MB4);
+
+ vpxor ROW3, ROW1, ROW1;
+ vpxor ROW4, ROW2, ROW2;
+ vpxor (STATE_H + 0 * 8)(RSTATE), ROW1, ROW1;
+ vpxor (STATE_H + 4 * 8)(RSTATE), ROW2, ROW2;
+
+ vmovdqu ROW1, (STATE_H + 0 * 8)(RSTATE);
+ vmovdqu ROW2, (STATE_H + 4 * 8)(RSTATE);
+
+ xor %eax, %eax;
+ vzeroall;
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blake2b_transform_amd64_avx2,
+ .-_gcry_blake2b_transform_amd64_avx2;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S b/comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S
new file mode 100644
index 0000000000..5b93675871
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S
@@ -0,0 +1,278 @@
+/* blake2s-amd64-avx.S - AVX implementation of BLAKE2s
+ *
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 BLAKE2 reference implementation
+ * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/sse
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* register macros */
+#define RSTATE %rdi
+#define RINBLKS %rsi
+#define RNBLKS %rdx
+#define RIV %rcx
+
+/* state structure */
+#define STATE_H 0
+#define STATE_T (STATE_H + 8 * 4)
+#define STATE_F (STATE_T + 2 * 4)
+
+/* vector registers */
+#define ROW1 %xmm0
+#define ROW2 %xmm1
+#define ROW3 %xmm2
+#define ROW4 %xmm3
+#define TMP1 %xmm4
+#define TMP1x %xmm4
+#define R16 %xmm5
+#define R8 %xmm6
+
+#define MA1 %xmm8
+#define MA2 %xmm9
+#define MA3 %xmm10
+#define MA4 %xmm11
+
+#define MB1 %xmm12
+#define MB2 %xmm13
+#define MB3 %xmm14
+#define MB4 %xmm15
+
+/**********************************************************************
+ blake2s/AVX
+ **********************************************************************/
+
+#define GATHER_MSG(m1, m2, m3, m4, \
+ s0, s1, s2, s3, s4, s5, s6, s7, s8, \
+ s9, s10, s11, s12, s13, s14, s15) \
+ vmovd (s0)*4(RINBLKS), m1; \
+ vmovd (s1)*4(RINBLKS), m2; \
+ vmovd (s8)*4(RINBLKS), m3; \
+ vmovd (s9)*4(RINBLKS), m4; \
+ vpinsrd $1, (s2)*4(RINBLKS), m1, m1; \
+ vpinsrd $1, (s3)*4(RINBLKS), m2, m2; \
+ vpinsrd $1, (s10)*4(RINBLKS), m3, m3; \
+ vpinsrd $1, (s11)*4(RINBLKS), m4, m4; \
+ vpinsrd $2, (s4)*4(RINBLKS), m1, m1; \
+ vpinsrd $2, (s5)*4(RINBLKS), m2, m2; \
+ vpinsrd $2, (s12)*4(RINBLKS), m3, m3; \
+ vpinsrd $2, (s13)*4(RINBLKS), m4, m4; \
+ vpinsrd $3, (s6)*4(RINBLKS), m1, m1; \
+ vpinsrd $3, (s7)*4(RINBLKS), m2, m2; \
+ vpinsrd $3, (s14)*4(RINBLKS), m3, m3; \
+ vpinsrd $3, (s15)*4(RINBLKS), m4, m4;
+
+#define LOAD_MSG_0(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+#define LOAD_MSG_1(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3)
+#define LOAD_MSG_2(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4)
+#define LOAD_MSG_3(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8)
+#define LOAD_MSG_4(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13)
+#define LOAD_MSG_5(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9)
+#define LOAD_MSG_6(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11)
+#define LOAD_MSG_7(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10)
+#define LOAD_MSG_8(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5)
+#define LOAD_MSG_9(m1, m2, m3, m4) \
+ GATHER_MSG(m1, m2, m3, m4, \
+ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0)
+
+#define LOAD_MSG(r, m1, m2, m3, m4) LOAD_MSG_##r(m1, m2, m3, m4)
+
+#define ROR_16(in, out) vpshufb R16, in, out;
+
+#define ROR_8(in, out) vpshufb R8, in, out;
+
+#define ROR_12(in, out) \
+ vpsrld $12, in, TMP1; \
+ vpslld $(32 - 12), in, out; \
+ vpxor TMP1, out, out;
+
+#define ROR_7(in, out) \
+ vpsrld $7, in, TMP1; \
+ vpslld $(32 - 7), in, out; \
+ vpxor TMP1, out, out;
+
+#define G(r1, r2, r3, r4, m, ROR_A, ROR_B) \
+ vpaddd m, r1, r1; \
+ vpaddd r2, r1, r1; \
+ vpxor r1, r4, r4; \
+ ROR_A(r4, r4); \
+ vpaddd r4, r3, r3; \
+ vpxor r3, r2, r2; \
+ ROR_B(r2, r2);
+
+#define G1(r1, r2, r3, r4, m) \
+ G(r1, r2, r3, r4, m, ROR_16, ROR_12);
+
+#define G2(r1, r2, r3, r4, m) \
+ G(r1, r2, r3, r4, m, ROR_8, ROR_7);
+
+#define MM_SHUFFLE(z,y,x,w) \
+ (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
+
+#define DIAGONALIZE(r1, r2, r3, r4) \
+ vpshufd $MM_SHUFFLE(0,3,2,1), r2, r2; \
+ vpshufd $MM_SHUFFLE(1,0,3,2), r3, r3; \
+ vpshufd $MM_SHUFFLE(2,1,0,3), r4, r4;
+
+#define UNDIAGONALIZE(r1, r2, r3, r4) \
+ vpshufd $MM_SHUFFLE(2,1,0,3), r2, r2; \
+ vpshufd $MM_SHUFFLE(1,0,3,2), r3, r3; \
+ vpshufd $MM_SHUFFLE(0,3,2,1), r4, r4;
+
+#define ROUND(r, m1, m2, m3, m4) \
+ G1(ROW1, ROW2, ROW3, ROW4, m1); \
+ G2(ROW1, ROW2, ROW3, ROW4, m2); \
+ DIAGONALIZE(ROW1, ROW2, ROW3, ROW4); \
+ G1(ROW1, ROW2, ROW3, ROW4, m3); \
+ G2(ROW1, ROW2, ROW3, ROW4, m4); \
+ UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4);
+
+blake2s_data:
+.align 16
+.Liv:
+ .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
+ .long 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+.Lshuf_ror16:
+ .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.Lshuf_ror8:
+ .byte 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12
+
+.align 64
+.globl _gcry_blake2s_transform_amd64_avx
+ELF(.type _gcry_blake2s_transform_amd64_avx,@function;)
+
+_gcry_blake2s_transform_amd64_avx:
+ /* input:
+ * %rdi: state
+ * %rsi: blks
+ * %rdx: num_blks
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ addq $64, (STATE_T + 0)(RSTATE);
+
+ vmovdqa .Lshuf_ror16 rRIP, R16;
+ vmovdqa .Lshuf_ror8 rRIP, R8;
+
+ vmovdqa .Liv+(0 * 4) rRIP, ROW3;
+ vmovdqa .Liv+(4 * 4) rRIP, ROW4;
+
+ vmovdqu (STATE_H + 0 * 4)(RSTATE), ROW1;
+ vmovdqu (STATE_H + 4 * 4)(RSTATE), ROW2;
+
+ vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+ LOAD_MSG(0, MA1, MA2, MA3, MA4);
+ LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+.Loop:
+ ROUND(0, MA1, MA2, MA3, MA4);
+ LOAD_MSG(2, MA1, MA2, MA3, MA4);
+ ROUND(1, MB1, MB2, MB3, MB4);
+ LOAD_MSG(3, MB1, MB2, MB3, MB4);
+ ROUND(2, MA1, MA2, MA3, MA4);
+ LOAD_MSG(4, MA1, MA2, MA3, MA4);
+ ROUND(3, MB1, MB2, MB3, MB4);
+ LOAD_MSG(5, MB1, MB2, MB3, MB4);
+ ROUND(4, MA1, MA2, MA3, MA4);
+ LOAD_MSG(6, MA1, MA2, MA3, MA4);
+ ROUND(5, MB1, MB2, MB3, MB4);
+ LOAD_MSG(7, MB1, MB2, MB3, MB4);
+ ROUND(6, MA1, MA2, MA3, MA4);
+ LOAD_MSG(8, MA1, MA2, MA3, MA4);
+ ROUND(7, MB1, MB2, MB3, MB4);
+ LOAD_MSG(9, MB1, MB2, MB3, MB4);
+ sub $1, RNBLKS;
+ jz .Loop_end;
+
+ lea 64(RINBLKS), RINBLKS;
+ addq $64, (STATE_T + 0)(RSTATE);
+
+ ROUND(8, MA1, MA2, MA3, MA4);
+ LOAD_MSG(0, MA1, MA2, MA3, MA4);
+ ROUND(9, MB1, MB2, MB3, MB4);
+ LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+ vpxor ROW3, ROW1, ROW1;
+ vpxor ROW4, ROW2, ROW2;
+
+ vmovdqa .Liv+(0 * 4) rRIP, ROW3;
+ vmovdqa .Liv+(4 * 4) rRIP, ROW4;
+
+ vpxor (STATE_H + 0 * 4)(RSTATE), ROW1, ROW1;
+ vpxor (STATE_H + 4 * 4)(RSTATE), ROW2, ROW2;
+
+ vmovdqu ROW1, (STATE_H + 0 * 4)(RSTATE);
+ vmovdqu ROW2, (STATE_H + 4 * 4)(RSTATE);
+
+ vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+ jmp .Loop;
+
+.Loop_end:
+ ROUND(8, MA1, MA2, MA3, MA4);
+ ROUND(9, MB1, MB2, MB3, MB4);
+
+ vpxor ROW3, ROW1, ROW1;
+ vpxor ROW4, ROW2, ROW2;
+ vpxor (STATE_H + 0 * 4)(RSTATE), ROW1, ROW1;
+ vpxor (STATE_H + 4 * 4)(RSTATE), ROW2, ROW2;
+
+ vmovdqu ROW1, (STATE_H + 0 * 4)(RSTATE);
+ vmovdqu ROW2, (STATE_H + 4 * 4)(RSTATE);
+
+ xor %eax, %eax;
+ vzeroall;
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blake2s_transform_amd64_avx,
+ .-_gcry_blake2s_transform_amd64_avx;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/blowfish-amd64.S b/comm/third_party/libgcrypt/cipher/blowfish-amd64.S
new file mode 100644
index 0000000000..bdb361d7eb
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/blowfish-amd64.S
@@ -0,0 +1,601 @@
+/* blowfish-amd64.S - AMD64 assembly implementation of Blowfish cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(USE_BLOWFISH) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* structure of BLOWFISH_context: */
+#define s0 0
+#define s1 ((s0) + 256 * 4)
+#define s2 ((s1) + 256 * 4)
+#define s3 ((s2) + 256 * 4)
+#define p ((s3) + 256 * 4)
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rcx
+#define RX3 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %ecx
+#define RX3d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %cl
+#define RX3bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %ch
+#define RX3bh %dh
+
+#define RT0 %rbp
+#define RT1 %rsi
+#define RT2 %r8
+#define RT3 %r9
+
+#define RT0d %ebp
+#define RT1d %esi
+#define RT2d %r8d
+#define RT3d %r9d
+
+#define RKEY %r10
+
+/***********************************************************************
+ * 1-way blowfish
+ ***********************************************************************/
+#define F() \
+ movzbl RX0bh, RT1d; \
+ movzbl RX0bl, RT3d; \
+ rorq $16, RX0; \
+ movzbl RX0bh, RT0d; \
+ movzbl RX0bl, RT2d; \
+ rorq $16, RX0; \
+ movl s0(CTX,RT0,4), RT0d; \
+ addl s1(CTX,RT2,4), RT0d; \
+ xorl s2(CTX,RT1,4), RT0d; \
+ addl s3(CTX,RT3,4), RT0d; \
+ xorq RT0, RX0;
+
+#define load_roundkey_enc(n) \
+ movq p+4*(n)(CTX), RX3;
+
+#define add_roundkey_enc() \
+ xorq RX3, RX0;
+
+#define round_enc(n) \
+ add_roundkey_enc(); \
+ load_roundkey_enc(n); \
+ \
+ F(); \
+ F();
+
+#define load_roundkey_dec(n) \
+ movq p+4*(n-1)(CTX), RX3; \
+ rorq $32, RX3;
+
+#define add_roundkey_dec() \
+ xorq RX3, RX0;
+
+#define round_dec(n) \
+ add_roundkey_dec(); \
+ load_roundkey_dec(n); \
+ \
+ F(); \
+ F();
+
+#define read_block() \
+ movq (RIO), RX0; \
+ rorq $32, RX0; \
+ bswapq RX0;
+
+#define write_block() \
+ bswapq RX0; \
+ movq RX0, (RIO);
+
+.align 8
+ELF(.type __blowfish_enc_blk1,@function;)
+
+__blowfish_enc_blk1:
+ /* input:
+ * %rdi: ctx, CTX
+ * RX0: input plaintext block
+ * output:
+ * RX0: output plaintext block
+ */
+ CFI_STARTPROC();
+ movq %rbp, %r11;
+ CFI_REGISTER(%rbp, %r11);
+
+ load_roundkey_enc(0);
+ round_enc(2);
+ round_enc(4);
+ round_enc(6);
+ round_enc(8);
+ round_enc(10);
+ round_enc(12);
+ round_enc(14);
+ round_enc(16);
+ add_roundkey_enc();
+
+ movq %r11, %rbp;
+ CFI_RESTORE(%rbp)
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
+
+.align 8
+.globl _gcry_blowfish_amd64_do_encrypt
+ELF(.type _gcry_blowfish_amd64_do_encrypt,@function;)
+
+_gcry_blowfish_amd64_do_encrypt:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: u32 *ret_xl
+ * %rdx: u32 *ret_xr
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ movl (%rdx), RX0d;
+ shlq $32, RX0;
+ movl (%rsi), RT3d;
+ movq %rdx, %r10;
+ orq RT3, RX0;
+ movq %rsi, RX2;
+
+ call __blowfish_enc_blk1;
+
+ movl RX0d, (%r10);
+ shrq $32, RX0;
+ movl RX0d, (RX2);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
+
+.align 8
+.globl _gcry_blowfish_amd64_encrypt_block
+ELF(.type _gcry_blowfish_amd64_encrypt_block,@function;)
+
+_gcry_blowfish_amd64_encrypt_block:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ movq %rsi, %r10;
+
+ movq %rdx, RIO;
+ read_block();
+
+ call __blowfish_enc_blk1;
+
+ movq %r10, RIO;
+ write_block();
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
+
+.align 8
+.globl _gcry_blowfish_amd64_decrypt_block
+ELF(.type _gcry_blowfish_amd64_decrypt_block,@function;)
+
+_gcry_blowfish_amd64_decrypt_block:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ movq %rbp, %r11;
+ CFI_REGISTER(%rbp, %r11);
+
+ movq %rsi, %r10;
+ movq %rdx, RIO;
+
+ read_block();
+
+ load_roundkey_dec(17);
+ round_dec(15);
+ round_dec(13);
+ round_dec(11);
+ round_dec(9);
+ round_dec(7);
+ round_dec(5);
+ round_dec(3);
+ round_dec(1);
+ add_roundkey_dec();
+
+ movq %r10, RIO;
+ write_block();
+
+ movq %r11, %rbp;
+ CFI_RESTORE(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
+
+/**********************************************************************
+ 4-way blowfish, four blocks parallel
+ **********************************************************************/
+#define F4(x) \
+ movzbl x ## bh, RT1d; \
+ movzbl x ## bl, RT3d; \
+ rorq $16, x; \
+ movzbl x ## bh, RT0d; \
+ movzbl x ## bl, RT2d; \
+ rorq $16, x; \
+ movl s0(CTX,RT0,4), RT0d; \
+ addl s1(CTX,RT2,4), RT0d; \
+ xorl s2(CTX,RT1,4), RT0d; \
+ addl s3(CTX,RT3,4), RT0d; \
+ xorq RT0, x;
+
+#define add_preloaded_roundkey4() \
+ xorq RKEY, RX0; \
+ xorq RKEY, RX1; \
+ xorq RKEY, RX2; \
+ xorq RKEY, RX3;
+
+#define preload_roundkey_enc(n) \
+ movq p+4*(n)(CTX), RKEY;
+
+#define add_roundkey_enc4(n) \
+ add_preloaded_roundkey4(); \
+ preload_roundkey_enc(n + 2);
+
+#define round_enc4(n) \
+ add_roundkey_enc4(n); \
+ \
+ F4(RX0); \
+ F4(RX1); \
+ F4(RX2); \
+ F4(RX3); \
+ \
+ F4(RX0); \
+ F4(RX1); \
+ F4(RX2); \
+ F4(RX3);
+
+#define preload_roundkey_dec(n) \
+ movq p+4*((n)-1)(CTX), RKEY; \
+ rorq $32, RKEY;
+
+#define add_roundkey_dec4(n) \
+ add_preloaded_roundkey4(); \
+ preload_roundkey_dec(n - 2);
+
+#define round_dec4(n) \
+ add_roundkey_dec4(n); \
+ \
+ F4(RX0); \
+ F4(RX1); \
+ F4(RX2); \
+ F4(RX3); \
+ \
+ F4(RX0); \
+ F4(RX1); \
+ F4(RX2); \
+ F4(RX3);
+
+#define inbswap_block4() \
+ rorq $32, RX0; \
+ bswapq RX0; \
+ rorq $32, RX1; \
+ bswapq RX1; \
+ rorq $32, RX2; \
+ bswapq RX2; \
+ rorq $32, RX3; \
+ bswapq RX3;
+
+#define inctrswap_block4() \
+ rorq $32, RX0; \
+ rorq $32, RX1; \
+ rorq $32, RX2; \
+ rorq $32, RX3;
+
+#define outbswap_block4() \
+ bswapq RX0; \
+ bswapq RX1; \
+ bswapq RX2; \
+ bswapq RX3;
+
+.align 8
+ELF(.type __blowfish_enc_blk4,@function;)
+
+__blowfish_enc_blk4:
+ /* input:
+ * %rdi: ctx, CTX
+ * RX0,RX1,RX2,RX3: four input inbswapped plaintext blocks
+ * output:
+ * RX0,RX1,RX2,RX3: four output ciphertext blocks
+ */
+ CFI_STARTPROC();
+ preload_roundkey_enc(0);
+
+ round_enc4(0);
+ round_enc4(2);
+ round_enc4(4);
+ round_enc4(6);
+ round_enc4(8);
+ round_enc4(10);
+ round_enc4(12);
+ round_enc4(14);
+ add_preloaded_roundkey4();
+
+ outbswap_block4();
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
+
+.align 8
+ELF(.type __blowfish_dec_blk4,@function;)
+
+__blowfish_dec_blk4:
+ /* input:
+ * %rdi: ctx, CTX
+ * RX0,RX1,RX2,RX3: four input ciphertext blocks
+ * output:
+ * RX0,RX1,RX2,RX3: four output plaintext blocks
+ */
+ CFI_STARTPROC();
+ preload_roundkey_dec(17);
+
+ inbswap_block4();
+
+ round_dec4(17);
+ round_dec4(15);
+ round_dec4(13);
+ round_dec4(11);
+ round_dec4(9);
+ round_dec4(7);
+ round_dec4(5);
+ round_dec4(3);
+ add_preloaded_roundkey4();
+
+ outbswap_block4();
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
+
+.align 8
+.globl _gcry_blowfish_amd64_ctr_enc
+ELF(.type _gcry_blowfish_amd64_ctr_enc,@function;)
+_gcry_blowfish_amd64_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
+ * %rcx: iv (big endian, 64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+
+ /* %r11-%r13 are not used by __blowfish_enc_blk4 */
+ movq %rcx, %r13; /*iv*/
+ movq %rdx, %r12; /*src*/
+ movq %rsi, %r11; /*dst*/
+
+ /* load IV and byteswap */
+ movq (%r13), RT0;
+ bswapq RT0;
+ movq RT0, RX0;
+
+ /* construct IVs */
+ leaq 1(RT0), RX1;
+ leaq 2(RT0), RX2;
+ leaq 3(RT0), RX3;
+ leaq 4(RT0), RT0;
+ bswapq RT0;
+
+ inctrswap_block4();
+
+ /* store new IV */
+ movq RT0, (%r13);
+
+ call __blowfish_enc_blk4;
+
+ /* XOR key-stream with plaintext */
+ xorq 0 * 8(%r12), RX0;
+ xorq 1 * 8(%r12), RX1;
+ xorq 2 * 8(%r12), RX2;
+ xorq 3 * 8(%r12), RX3;
+ movq RX0, 0 * 8(%r11);
+ movq RX1, 1 * 8(%r11);
+ movq RX2, 2 * 8(%r11);
+ movq RX3, 3 * 8(%r11);
+
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
+
+.align 8
+.globl _gcry_blowfish_amd64_cbc_dec
+ELF(.type _gcry_blowfish_amd64_cbc_dec,@function;)
+_gcry_blowfish_amd64_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
+ * %rcx: iv (64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+
+ /* %r11-%r13 are not used by __blowfish_dec_blk4 */
+ movq %rsi, %r11; /*dst*/
+ movq %rdx, %r12; /*src*/
+ movq %rcx, %r13; /*iv*/
+
+ /* load input */
+ movq 0 * 8(%r12), RX0;
+ movq 1 * 8(%r12), RX1;
+ movq 2 * 8(%r12), RX2;
+ movq 3 * 8(%r12), RX3;
+
+ call __blowfish_dec_blk4;
+
+ movq 3 * 8(%r12), RT0;
+ xorq (%r13), RX0;
+ xorq 0 * 8(%r12), RX1;
+ xorq 1 * 8(%r12), RX2;
+ xorq 2 * 8(%r12), RX3;
+ movq RT0, (%r13); /* store new IV */
+
+ movq RX0, 0 * 8(%r11);
+ movq RX1, 1 * 8(%r11);
+ movq RX2, 2 * 8(%r11);
+ movq RX3, 3 * 8(%r11);
+
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
+
+.align 8
+.globl _gcry_blowfish_amd64_cfb_dec
+ELF(.type _gcry_blowfish_amd64_cfb_dec,@function;)
+_gcry_blowfish_amd64_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
+ * %rcx: iv (64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+
+ /* %r11-%r13 are not used by __blowfish_enc_blk4 */
+ movq %rcx, %r13; /*iv*/
+ movq %rdx, %r12; /*src*/
+ movq %rsi, %r11; /*dst*/
+
+ /* Load input */
+ movq (%r13), RX0;
+ movq 0 * 8(%r12), RX1;
+ movq 1 * 8(%r12), RX2;
+ movq 2 * 8(%r12), RX3;
+
+ inbswap_block4();
+
+ /* Update IV */
+ movq 3 * 8(%r12), RT0;
+ movq RT0, (%r13);
+
+ call __blowfish_enc_blk4;
+
+ xorq 0 * 8(%r12), RX0;
+ xorq 1 * 8(%r12), RX1;
+ xorq 2 * 8(%r12), RX2;
+ xorq 3 * 8(%r12), RX3;
+ movq RX0, 0 * 8(%r11);
+ movq RX1, 1 * 8(%r11);
+ movq RX2, 2 * 8(%r11);
+ movq RX3, 3 * 8(%r11);
+
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
+
+#endif /*defined(USE_BLOWFISH)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/blowfish-arm.S b/comm/third_party/libgcrypt/cipher/blowfish-arm.S
new file mode 100644
index 0000000000..b30aa31f1d
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/blowfish-arm.S
@@ -0,0 +1,743 @@
+/* blowfish-arm.S - ARM assembly implementation of Blowfish cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* structure of crypto context */
+#define s0 0
+#define s1 (s0 + (1 * 256) * 4)
+#define s2 (s0 + (2 * 256) * 4)
+#define s3 (s0 + (3 * 256) * 4)
+#define p (s3 + (1 * 256) * 4)
+
+/* register macros */
+#define CTXs0 %r0
+#define CTXs1 %r9
+#define CTXs2 %r8
+#define CTXs3 %r10
+#define RMASK %lr
+#define RKEYL %r2
+#define RKEYR %ip
+
+#define RL0 %r3
+#define RR0 %r4
+
+#define RL1 %r9
+#define RR1 %r10
+
+#define RT0 %r11
+#define RT1 %r7
+#define RT2 %r5
+#define RT3 %r6
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 0)]; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 3)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+ mov rtmp0, rin, lsr #8; \
+ strb rin, [rdst, #((offs) + 0)]; \
+ mov rtmp1, rin, lsr #16; \
+ strb rtmp0, [rdst, #((offs) + 1)]; \
+ mov rtmp0, rin, lsr #24; \
+ strb rtmp1, [rdst, #((offs) + 2)]; \
+ strb rtmp0, [rdst, #((offs) + 3)];
+
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 3)]; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 0)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \
+ mov rtmp0, rin, lsr #8; \
+ strb rin, [rdst, #((offs) + 3)]; \
+ mov rtmp1, rin, lsr #16; \
+ strb rtmp0, [rdst, #((offs) + 2)]; \
+ mov rtmp0, rin, lsr #24; \
+ strb rtmp1, [rdst, #((offs) + 1)]; \
+ strb rtmp0, [rdst, #((offs) + 0)];
+
+#ifdef __ARMEL__
+ #define ldr_unaligned_host ldr_unaligned_le
+ #define str_unaligned_host str_unaligned_le
+
+ /* bswap on little-endian */
+#ifdef HAVE_ARM_ARCH_V6
+ #define host_to_be(reg, rtmp) \
+ rev reg, reg;
+ #define be_to_host(reg, rtmp) \
+ rev reg, reg;
+#else
+ #define host_to_be(reg, rtmp) \
+ eor rtmp, reg, reg, ror #16; \
+ mov rtmp, rtmp, lsr #8; \
+ bic rtmp, rtmp, #65280; \
+ eor reg, rtmp, reg, ror #8;
+ #define be_to_host(reg, rtmp) \
+ eor rtmp, reg, reg, ror #16; \
+ mov rtmp, rtmp, lsr #8; \
+ bic rtmp, rtmp, #65280; \
+ eor reg, rtmp, reg, ror #8;
+#endif
+#else
+ #define ldr_unaligned_host ldr_unaligned_be
+ #define str_unaligned_host str_unaligned_be
+
+ /* nop on big-endian */
+ #define host_to_be(reg, rtmp) /*_*/
+ #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define host_to_host(x, y) /*_*/
+
+/***********************************************************************
+ * 1-way blowfish
+ ***********************************************************************/
+#define F(l, r) \
+ and RT0, RMASK, l, lsr#(24 - 2); \
+ and RT1, RMASK, l, lsr#(16 - 2); \
+ ldr RT0, [CTXs0, RT0]; \
+ and RT2, RMASK, l, lsr#(8 - 2); \
+ ldr RT1, [CTXs1, RT1]; \
+ and RT3, RMASK, l, lsl#2; \
+ ldr RT2, [CTXs2, RT2]; \
+ add RT0, RT1; \
+ ldr RT3, [CTXs3, RT3]; \
+ eor RT0, RT2; \
+ add RT0, RT3; \
+ eor r, RT0;
+
+#define load_roundkey_enc(n) \
+ ldr RKEYL, [CTXs2, #((p - s2) + (4 * (n) + 0))]; \
+ ldr RKEYR, [CTXs2, #((p - s2) + (4 * (n) + 4))];
+
+#define add_roundkey_enc() \
+ eor RL0, RKEYL; \
+ eor RR0, RKEYR;
+
+#define round_enc(n) \
+ add_roundkey_enc(); \
+ load_roundkey_enc(n); \
+ \
+ F(RL0, RR0); \
+ F(RR0, RL0);
+
+#define load_roundkey_dec(n) \
+ ldr RKEYL, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 4))]; \
+ ldr RKEYR, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 0))];
+
+#define add_roundkey_dec() \
+ eor RL0, RKEYL; \
+ eor RR0, RKEYR;
+
+#define round_dec(n) \
+ add_roundkey_dec(); \
+ load_roundkey_dec(n); \
+ \
+ F(RL0, RR0); \
+ F(RR0, RL0);
+
+#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \
+ ldr l0, [rin, #((offs) + 0)]; \
+ ldr r0, [rin, #((offs) + 4)]; \
+ convert(l0, rtmp); \
+ convert(r0, rtmp);
+
+#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \
+ convert(l0, rtmp); \
+ convert(r0, rtmp); \
+ str l0, [rout, #((offs) + 0)]; \
+ str r0, [rout, #((offs) + 4)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+ /* unaligned word reads allowed */
+ #define read_block(rin, offs, l0, r0, rtmp0) \
+ read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0)
+
+ #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \
+ write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0)
+
+ #define read_block_host(rin, offs, l0, r0, rtmp0) \
+ read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0)
+
+ #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \
+ write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0)
+#else
+ /* need to handle unaligned reads by byte reads */
+ #define read_block(rin, offs, l0, r0, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \
+ ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \
+ 2:;
+
+ #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \
+ str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \
+ 2:;
+
+ #define read_block_host(rin, offs, l0, r0, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \
+ ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \
+ 2:;
+
+ #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \
+ str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block_aligned(rout, offs, l0, r0, host_to_host); \
+ 2:;
+#endif
+
+.align 3
+.type __blowfish_enc_blk1,%function;
+
+__blowfish_enc_blk1:
+ /* input:
+ * preloaded: CTX
+ * [RL0, RR0]: src
+ * output:
+ * [RR0, RL0]: dst
+ */
+ push {%lr};
+
+ add CTXs1, CTXs0, #(s1 - s0);
+ add CTXs2, CTXs0, #(s2 - s0);
+ mov RMASK, #(0xff << 2); /* byte mask */
+ add CTXs3, CTXs1, #(s3 - s1);
+
+ load_roundkey_enc(0);
+ round_enc(2);
+ round_enc(4);
+ round_enc(6);
+ round_enc(8);
+ round_enc(10);
+ round_enc(12);
+ round_enc(14);
+ round_enc(16);
+ add_roundkey_enc();
+
+ pop {%pc};
+.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
+
+.align 8
+.globl _gcry_blowfish_arm_do_encrypt
+.type _gcry_blowfish_arm_do_encrypt,%function;
+
+_gcry_blowfish_arm_do_encrypt:
+ /* input:
+ * %r0: ctx, CTX
+ * %r1: u32 *ret_xl
+ * %r2: u32 *ret_xr
+ */
+ push {%r2, %r4-%r11, %ip, %lr};
+
+ ldr RL0, [%r1];
+ ldr RR0, [%r2];
+
+ bl __blowfish_enc_blk1;
+
+ pop {%r2};
+ str RR0, [%r1];
+ str RL0, [%r2];
+
+ pop {%r4-%r11, %ip, %pc};
+.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt;
+
+.align 3
+.globl _gcry_blowfish_arm_encrypt_block
+.type _gcry_blowfish_arm_encrypt_block,%function;
+
+_gcry_blowfish_arm_encrypt_block:
+ /* input:
+ * %r0: ctx, CTX
+ * %r1: dst
+ * %r2: src
+ */
+ push {%r4-%r11, %ip, %lr};
+
+ read_block(%r2, 0, RL0, RR0, RT0);
+
+ bl __blowfish_enc_blk1;
+
+ write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+ pop {%r4-%r11, %ip, %pc};
+.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block;
+
+.align 3
+.globl _gcry_blowfish_arm_decrypt_block
+.type _gcry_blowfish_arm_decrypt_block,%function;
+
+_gcry_blowfish_arm_decrypt_block:
+ /* input:
+ * %r0: ctx, CTX
+ * %r1: dst
+ * %r2: src
+ */
+ push {%r4-%r11, %ip, %lr};
+
+ add CTXs1, CTXs0, #(s1 - s0);
+ add CTXs2, CTXs0, #(s2 - s0);
+ mov RMASK, #(0xff << 2); /* byte mask */
+ add CTXs3, CTXs1, #(s3 - s1);
+
+ read_block(%r2, 0, RL0, RR0, RT0);
+
+ load_roundkey_dec(17);
+ round_dec(15);
+ round_dec(13);
+ round_dec(11);
+ round_dec(9);
+ round_dec(7);
+ round_dec(5);
+ round_dec(3);
+ round_dec(1);
+ add_roundkey_dec();
+
+ write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+ pop {%r4-%r11, %ip, %pc};
+.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block;
+
+/***********************************************************************
+ * 2-way blowfish
+ ***********************************************************************/
+#define F2(n, l0, r0, l1, r1, set_nextk, dec) \
+ \
+ and RT0, RMASK, l0, lsr#(24 - 2); \
+ and RT1, RMASK, l0, lsr#(16 - 2); \
+ and RT2, RMASK, l0, lsr#(8 - 2); \
+ add RT1, #(s1 - s0); \
+ \
+ ldr RT0, [CTXs0, RT0]; \
+ and RT3, RMASK, l0, lsl#2; \
+ ldr RT1, [CTXs0, RT1]; \
+ add RT3, #(s3 - s2); \
+ ldr RT2, [CTXs2, RT2]; \
+ add RT0, RT1; \
+ ldr RT3, [CTXs2, RT3]; \
+ \
+ and RT1, RMASK, l1, lsr#(24 - 2); \
+ eor RT0, RT2; \
+ and RT2, RMASK, l1, lsr#(16 - 2); \
+ add RT0, RT3; \
+ add RT2, #(s1 - s0); \
+ and RT3, RMASK, l1, lsr#(8 - 2); \
+ eor r0, RT0; \
+ \
+ ldr RT1, [CTXs0, RT1]; \
+ and RT0, RMASK, l1, lsl#2; \
+ ldr RT2, [CTXs0, RT2]; \
+ add RT0, #(s3 - s2); \
+ ldr RT3, [CTXs2, RT3]; \
+ add RT1, RT2; \
+ ldr RT0, [CTXs2, RT0]; \
+ \
+ and RT2, RMASK, r0, lsr#(24 - 2); \
+ eor RT1, RT3; \
+ and RT3, RMASK, r0, lsr#(16 - 2); \
+ add RT1, RT0; \
+ add RT3, #(s1 - s0); \
+ and RT0, RMASK, r0, lsr#(8 - 2); \
+ eor r1, RT1; \
+ \
+ ldr RT2, [CTXs0, RT2]; \
+ and RT1, RMASK, r0, lsl#2; \
+ ldr RT3, [CTXs0, RT3]; \
+ add RT1, #(s3 - s2); \
+ ldr RT0, [CTXs2, RT0]; \
+ add RT2, RT3; \
+ ldr RT1, [CTXs2, RT1]; \
+ \
+ and RT3, RMASK, r1, lsr#(24 - 2); \
+ eor RT2, RT0; \
+ and RT0, RMASK, r1, lsr#(16 - 2); \
+ add RT2, RT1; \
+ add RT0, #(s1 - s0); \
+ and RT1, RMASK, r1, lsr#(8 - 2); \
+ eor l0, RT2; \
+ \
+ ldr RT3, [CTXs0, RT3]; \
+ and RT2, RMASK, r1, lsl#2; \
+ ldr RT0, [CTXs0, RT0]; \
+ add RT2, #(s3 - s2); \
+ ldr RT1, [CTXs2, RT1]; \
+ eor l1, RKEYL; \
+ ldr RT2, [CTXs2, RT2]; \
+ \
+ eor r0, RKEYR; \
+ add RT3, RT0; \
+ eor r1, RKEYR; \
+ eor RT3, RT1; \
+ eor l0, RKEYL; \
+ add RT3, RT2; \
+ set_nextk(RKEYL, (p - s2) + (4 * (n) + ((dec) * 4))); \
+ eor l1, RT3; \
+ set_nextk(RKEYR, (p - s2) + (4 * (n) + (!(dec) * 4)));
+
+#define load_n_add_roundkey_enc2(n) \
+ load_roundkey_enc(n); \
+ eor RL0, RKEYL; \
+ eor RR0, RKEYR; \
+ eor RL1, RKEYL; \
+ eor RR1, RKEYR; \
+ load_roundkey_enc((n) + 2);
+
+#define next_key(reg, offs) \
+ ldr reg, [CTXs2, #(offs)];
+
+#define dummy(x, y) /* do nothing */
+
+#define round_enc2(n, load_next_key) \
+ F2((n) + 2, RL0, RR0, RL1, RR1, load_next_key, 0);
+
+#define load_n_add_roundkey_dec2(n) \
+ load_roundkey_dec(n); \
+ eor RL0, RKEYL; \
+ eor RR0, RKEYR; \
+ eor RL1, RKEYL; \
+ eor RR1, RKEYR; \
+ load_roundkey_dec((n) - 2);
+
+#define round_dec2(n, load_next_key) \
+ F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1);
+
+#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \
+ ldr l0, [rin, #(0)]; \
+ ldr r0, [rin, #(4)]; \
+ convert(l0, rtmp); \
+ ldr l1, [rin, #(8)]; \
+ convert(r0, rtmp); \
+ ldr r1, [rin, #(12)]; \
+ convert(l1, rtmp); \
+ convert(r1, rtmp);
+
+#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \
+ convert(l0, rtmp); \
+ convert(r0, rtmp); \
+ convert(l1, rtmp); \
+ str l0, [rout, #(0)]; \
+ convert(r1, rtmp); \
+ str r0, [rout, #(4)]; \
+ str l1, [rout, #(8)]; \
+ str r1, [rout, #(12)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+ /* unaligned word reads allowed */
+ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0)
+
+ #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0)
+
+ #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0)
+
+ #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0)
+#else
+ /* need to handle unaligned reads by byte reads */
+ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_be(l0, rin, 0, rtmp0); \
+ ldr_unaligned_be(r0, rin, 4, rtmp0); \
+ ldr_unaligned_be(l1, rin, 8, rtmp0); \
+ ldr_unaligned_be(r1, rin, 12, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \
+ 2:;
+
+ #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \
+ str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \
+ str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \
+ str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \
+ 2:;
+
+ #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_host(l0, rin, 0, rtmp0); \
+ ldr_unaligned_host(r0, rin, 4, rtmp0); \
+ ldr_unaligned_host(l1, rin, 8, rtmp0); \
+ ldr_unaligned_host(r1, rin, 12, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \
+ 2:;
+
+ #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \
+ str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \
+ str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \
+ str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \
+ 2:;
+#endif
+
+.align 3
+.type _gcry_blowfish_arm_enc_blk2,%function;
+
+_gcry_blowfish_arm_enc_blk2:
+ /* input:
+ * preloaded: CTX
+ * [RL0, RR0], [RL1, RR1]: src
+ * output:
+ * [RR0, RL0], [RR1, RL1]: dst
+ */
+ push {RT0,%lr};
+
+ add CTXs2, CTXs0, #(s2 - s0);
+ mov RMASK, #(0xff << 2); /* byte mask */
+
+ load_n_add_roundkey_enc2(0);
+ round_enc2(2, next_key);
+ round_enc2(4, next_key);
+ round_enc2(6, next_key);
+ round_enc2(8, next_key);
+ round_enc2(10, next_key);
+ round_enc2(12, next_key);
+ round_enc2(14, next_key);
+ round_enc2(16, dummy);
+
+ host_to_be(RR0, RT0);
+ host_to_be(RL0, RT0);
+ host_to_be(RR1, RT0);
+ host_to_be(RL1, RT0);
+
+ pop {RT0,%pc};
+.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2;
+
+.align 3
+.globl _gcry_blowfish_arm_cfb_dec;
+.type _gcry_blowfish_arm_cfb_dec,%function;
+
+_gcry_blowfish_arm_cfb_dec:
+ /* input:
+ * %r0: CTX
+ * %r1: dst (2 blocks)
+ * %r2: src (2 blocks)
+ * %r3: iv (64bit)
+ */
+ push {%r2, %r4-%r11, %ip, %lr};
+
+ mov %lr, %r3;
+
+ /* Load input (iv/%r3 is aligned, src/%r2 might not be) */
+ ldm %r3, {RL0, RR0};
+ host_to_be(RL0, RT0);
+ host_to_be(RR0, RT0);
+ read_block(%r2, 0, RL1, RR1, RT0);
+
+ /* Update IV, load src[1] and save to iv[0] */
+ read_block_host(%r2, 8, %r5, %r6, RT0);
+ stm %lr, {%r5, %r6};
+
+ bl _gcry_blowfish_arm_enc_blk2;
+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+ /* %r1: dst, %r0: %src */
+ pop {%r0};
+
+ /* dst = src ^ result */
+ read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr);
+ eor %r5, %r4;
+ eor %r6, %r3;
+ eor %r7, %r10;
+ eor %r8, %r9;
+ write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec;
+
+.align 3
+.globl _gcry_blowfish_arm_ctr_enc;
+.type _gcry_blowfish_arm_ctr_enc,%function;
+
+_gcry_blowfish_arm_ctr_enc:
+ /* input:
+ * %r0: CTX
+ * %r1: dst (2 blocks)
+ * %r2: src (2 blocks)
+ * %r3: iv (64bit, big-endian)
+ */
+ push {%r2, %r4-%r11, %ip, %lr};
+
+ mov %lr, %r3;
+
+ /* Load IV (big => host endian) */
+ read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0);
+
+ /* Construct IVs */
+ adds RR1, RR0, #1; /* +1 */
+ adc RL1, RL0, #0;
+ adds %r6, RR1, #1; /* +2 */
+ adc %r5, RL1, #0;
+
+ /* Store new IV (host => big-endian) */
+ write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0);
+
+ bl _gcry_blowfish_arm_enc_blk2;
+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+ /* %r1: dst, %r0: %src */
+ pop {%r0};
+
+ /* XOR key-stream with plaintext */
+ read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr);
+ eor %r5, %r4;
+ eor %r6, %r3;
+ eor %r7, %r10;
+ eor %r8, %r9;
+ write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc;
+
+.align 3
+.type _gcry_blowfish_arm_dec_blk2,%function;
+
+_gcry_blowfish_arm_dec_blk2:
+ /* input:
+ * preloaded: CTX
+ * [RL0, RR0], [RL1, RR1]: src
+ * output:
+ * [RR0, RL0], [RR1, RL1]: dst
+ */
+ add CTXs2, CTXs0, #(s2 - s0);
+ mov RMASK, #(0xff << 2); /* byte mask */
+
+ load_n_add_roundkey_dec2(17);
+ round_dec2(15, next_key);
+ round_dec2(13, next_key);
+ round_dec2(11, next_key);
+ round_dec2(9, next_key);
+ round_dec2(7, next_key);
+ round_dec2(5, next_key);
+ round_dec2(3, next_key);
+ round_dec2(1, dummy);
+
+ host_to_be(RR0, RT0);
+ host_to_be(RL0, RT0);
+ host_to_be(RR1, RT0);
+ host_to_be(RL1, RT0);
+
+ b .Ldec_cbc_tail;
+.ltorg
+.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2;
+
+.align 3
+.globl _gcry_blowfish_arm_cbc_dec;
+.type _gcry_blowfish_arm_cbc_dec,%function;
+
+_gcry_blowfish_arm_cbc_dec:
+ /* input:
+ * %r0: CTX
+ * %r1: dst (2 blocks)
+ * %r2: src (2 blocks)
+ * %r3: iv (64bit)
+ */
+ push {%r2-%r11, %ip, %lr};
+
+ read_block2(%r2, RL0, RR0, RL1, RR1, RT0);
+
+ /* dec_blk2 is only used by cbc_dec, jump directly in/out instead
+ * of function call. */
+ b _gcry_blowfish_arm_dec_blk2;
+.Ldec_cbc_tail:
+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+ /* %r0: %src, %r1: dst, %r2: iv */
+ pop {%r0, %r2};
+
+ /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */
+ read_block_host(%r0, 0, %r7, %r8, %r5);
+ /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */
+ ldm %r2, {%r5, %r6};
+
+ /* out[1] ^= IV+1 */
+ eor %r10, %r7;
+ eor %r9, %r8;
+ /* out[0] ^= IV */
+ eor %r4, %r5;
+ eor %r3, %r6;
+
+ /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */
+ read_block_host(%r0, 8, %r7, %r8, %r5);
+ /* store IV+2 to iv[0] (aligned). */
+ stm %r2, {%r7, %r8};
+
+ /* store result to dst[0-3]. Might be unaligned. */
+ write_block2_host(%r1, %r4, %r3, %r10, %r9, %r5, %r6);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec;
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARM_ARCH >= 6*/
diff --git a/comm/third_party/libgcrypt/cipher/blowfish.c b/comm/third_party/libgcrypt/cipher/blowfish.c
new file mode 100644
index 0000000000..7b001306c7
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/blowfish.c
@@ -0,0 +1,1142 @@
+/* blowfish.c - Blowfish encryption
+ * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * For a description of the algorithm, see:
+ * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996.
+ * ISBN 0-471-11709-9. Pages 336 ff.
+ */
+
+/* Test values:
+ * key "abcdefghijklmnopqrstuvwxyz";
+ * plain "BLOWFISH"
+ * cipher 32 4E D0 FE F4 13 A2 03
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+#define BLOWFISH_BLOCKSIZE 8
+#define BLOWFISH_KEY_MIN_BITS 8
+#define BLOWFISH_KEY_MAX_BITS 576
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+# define USE_ARM_ASM 1
+# endif
+#endif
+
+typedef struct {
+ u32 s0[256];
+ u32 s1[256];
+ u32 s2[256];
+ u32 s3[256];
+ u32 p[16+2];
+} BLOWFISH_context;
+
+static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops);
+static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf);
+static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf);
+
+
+/* precomputed S boxes */
+static const u32 ks0[256] = {
+ 0xD1310BA6,0x98DFB5AC,0x2FFD72DB,0xD01ADFB7,0xB8E1AFED,0x6A267E96,
+ 0xBA7C9045,0xF12C7F99,0x24A19947,0xB3916CF7,0x0801F2E2,0x858EFC16,
+ 0x636920D8,0x71574E69,0xA458FEA3,0xF4933D7E,0x0D95748F,0x728EB658,
+ 0x718BCD58,0x82154AEE,0x7B54A41D,0xC25A59B5,0x9C30D539,0x2AF26013,
+ 0xC5D1B023,0x286085F0,0xCA417918,0xB8DB38EF,0x8E79DCB0,0x603A180E,
+ 0x6C9E0E8B,0xB01E8A3E,0xD71577C1,0xBD314B27,0x78AF2FDA,0x55605C60,
+ 0xE65525F3,0xAA55AB94,0x57489862,0x63E81440,0x55CA396A,0x2AAB10B6,
+ 0xB4CC5C34,0x1141E8CE,0xA15486AF,0x7C72E993,0xB3EE1411,0x636FBC2A,
+ 0x2BA9C55D,0x741831F6,0xCE5C3E16,0x9B87931E,0xAFD6BA33,0x6C24CF5C,
+ 0x7A325381,0x28958677,0x3B8F4898,0x6B4BB9AF,0xC4BFE81B,0x66282193,
+ 0x61D809CC,0xFB21A991,0x487CAC60,0x5DEC8032,0xEF845D5D,0xE98575B1,
+ 0xDC262302,0xEB651B88,0x23893E81,0xD396ACC5,0x0F6D6FF3,0x83F44239,
+ 0x2E0B4482,0xA4842004,0x69C8F04A,0x9E1F9B5E,0x21C66842,0xF6E96C9A,
+ 0x670C9C61,0xABD388F0,0x6A51A0D2,0xD8542F68,0x960FA728,0xAB5133A3,
+ 0x6EEF0B6C,0x137A3BE4,0xBA3BF050,0x7EFB2A98,0xA1F1651D,0x39AF0176,
+ 0x66CA593E,0x82430E88,0x8CEE8619,0x456F9FB4,0x7D84A5C3,0x3B8B5EBE,
+ 0xE06F75D8,0x85C12073,0x401A449F,0x56C16AA6,0x4ED3AA62,0x363F7706,
+ 0x1BFEDF72,0x429B023D,0x37D0D724,0xD00A1248,0xDB0FEAD3,0x49F1C09B,
+ 0x075372C9,0x80991B7B,0x25D479D8,0xF6E8DEF7,0xE3FE501A,0xB6794C3B,
+ 0x976CE0BD,0x04C006BA,0xC1A94FB6,0x409F60C4,0x5E5C9EC2,0x196A2463,
+ 0x68FB6FAF,0x3E6C53B5,0x1339B2EB,0x3B52EC6F,0x6DFC511F,0x9B30952C,
+ 0xCC814544,0xAF5EBD09,0xBEE3D004,0xDE334AFD,0x660F2807,0x192E4BB3,
+ 0xC0CBA857,0x45C8740F,0xD20B5F39,0xB9D3FBDB,0x5579C0BD,0x1A60320A,
+ 0xD6A100C6,0x402C7279,0x679F25FE,0xFB1FA3CC,0x8EA5E9F8,0xDB3222F8,
+ 0x3C7516DF,0xFD616B15,0x2F501EC8,0xAD0552AB,0x323DB5FA,0xFD238760,
+ 0x53317B48,0x3E00DF82,0x9E5C57BB,0xCA6F8CA0,0x1A87562E,0xDF1769DB,
+ 0xD542A8F6,0x287EFFC3,0xAC6732C6,0x8C4F5573,0x695B27B0,0xBBCA58C8,
+ 0xE1FFA35D,0xB8F011A0,0x10FA3D98,0xFD2183B8,0x4AFCB56C,0x2DD1D35B,
+ 0x9A53E479,0xB6F84565,0xD28E49BC,0x4BFB9790,0xE1DDF2DA,0xA4CB7E33,
+ 0x62FB1341,0xCEE4C6E8,0xEF20CADA,0x36774C01,0xD07E9EFE,0x2BF11FB4,
+ 0x95DBDA4D,0xAE909198,0xEAAD8E71,0x6B93D5A0,0xD08ED1D0,0xAFC725E0,
+ 0x8E3C5B2F,0x8E7594B7,0x8FF6E2FB,0xF2122B64,0x8888B812,0x900DF01C,
+ 0x4FAD5EA0,0x688FC31C,0xD1CFF191,0xB3A8C1AD,0x2F2F2218,0xBE0E1777,
+ 0xEA752DFE,0x8B021FA1,0xE5A0CC0F,0xB56F74E8,0x18ACF3D6,0xCE89E299,
+ 0xB4A84FE0,0xFD13E0B7,0x7CC43B81,0xD2ADA8D9,0x165FA266,0x80957705,
+ 0x93CC7314,0x211A1477,0xE6AD2065,0x77B5FA86,0xC75442F5,0xFB9D35CF,
+ 0xEBCDAF0C,0x7B3E89A0,0xD6411BD3,0xAE1E7E49,0x00250E2D,0x2071B35E,
+ 0x226800BB,0x57B8E0AF,0x2464369B,0xF009B91E,0x5563911D,0x59DFA6AA,
+ 0x78C14389,0xD95A537F,0x207D5BA2,0x02E5B9C5,0x83260376,0x6295CFA9,
+ 0x11C81968,0x4E734A41,0xB3472DCA,0x7B14A94A,0x1B510052,0x9A532915,
+ 0xD60F573F,0xBC9BC6E4,0x2B60A476,0x81E67400,0x08BA6FB5,0x571BE91F,
+ 0xF296EC6B,0x2A0DD915,0xB6636521,0xE7B9F9B6,0xFF34052E,0xC5855664,
+ 0x53B02D5D,0xA99F8FA1,0x08BA4799,0x6E85076A };
+
+static const u32 ks1[256] = {
+ 0x4B7A70E9,0xB5B32944,0xDB75092E,0xC4192623,0xAD6EA6B0,0x49A7DF7D,
+ 0x9CEE60B8,0x8FEDB266,0xECAA8C71,0x699A17FF,0x5664526C,0xC2B19EE1,
+ 0x193602A5,0x75094C29,0xA0591340,0xE4183A3E,0x3F54989A,0x5B429D65,
+ 0x6B8FE4D6,0x99F73FD6,0xA1D29C07,0xEFE830F5,0x4D2D38E6,0xF0255DC1,
+ 0x4CDD2086,0x8470EB26,0x6382E9C6,0x021ECC5E,0x09686B3F,0x3EBAEFC9,
+ 0x3C971814,0x6B6A70A1,0x687F3584,0x52A0E286,0xB79C5305,0xAA500737,
+ 0x3E07841C,0x7FDEAE5C,0x8E7D44EC,0x5716F2B8,0xB03ADA37,0xF0500C0D,
+ 0xF01C1F04,0x0200B3FF,0xAE0CF51A,0x3CB574B2,0x25837A58,0xDC0921BD,
+ 0xD19113F9,0x7CA92FF6,0x94324773,0x22F54701,0x3AE5E581,0x37C2DADC,
+ 0xC8B57634,0x9AF3DDA7,0xA9446146,0x0FD0030E,0xECC8C73E,0xA4751E41,
+ 0xE238CD99,0x3BEA0E2F,0x3280BBA1,0x183EB331,0x4E548B38,0x4F6DB908,
+ 0x6F420D03,0xF60A04BF,0x2CB81290,0x24977C79,0x5679B072,0xBCAF89AF,
+ 0xDE9A771F,0xD9930810,0xB38BAE12,0xDCCF3F2E,0x5512721F,0x2E6B7124,
+ 0x501ADDE6,0x9F84CD87,0x7A584718,0x7408DA17,0xBC9F9ABC,0xE94B7D8C,
+ 0xEC7AEC3A,0xDB851DFA,0x63094366,0xC464C3D2,0xEF1C1847,0x3215D908,
+ 0xDD433B37,0x24C2BA16,0x12A14D43,0x2A65C451,0x50940002,0x133AE4DD,
+ 0x71DFF89E,0x10314E55,0x81AC77D6,0x5F11199B,0x043556F1,0xD7A3C76B,
+ 0x3C11183B,0x5924A509,0xF28FE6ED,0x97F1FBFA,0x9EBABF2C,0x1E153C6E,
+ 0x86E34570,0xEAE96FB1,0x860E5E0A,0x5A3E2AB3,0x771FE71C,0x4E3D06FA,
+ 0x2965DCB9,0x99E71D0F,0x803E89D6,0x5266C825,0x2E4CC978,0x9C10B36A,
+ 0xC6150EBA,0x94E2EA78,0xA5FC3C53,0x1E0A2DF4,0xF2F74EA7,0x361D2B3D,
+ 0x1939260F,0x19C27960,0x5223A708,0xF71312B6,0xEBADFE6E,0xEAC31F66,
+ 0xE3BC4595,0xA67BC883,0xB17F37D1,0x018CFF28,0xC332DDEF,0xBE6C5AA5,
+ 0x65582185,0x68AB9802,0xEECEA50F,0xDB2F953B,0x2AEF7DAD,0x5B6E2F84,
+ 0x1521B628,0x29076170,0xECDD4775,0x619F1510,0x13CCA830,0xEB61BD96,
+ 0x0334FE1E,0xAA0363CF,0xB5735C90,0x4C70A239,0xD59E9E0B,0xCBAADE14,
+ 0xEECC86BC,0x60622CA7,0x9CAB5CAB,0xB2F3846E,0x648B1EAF,0x19BDF0CA,
+ 0xA02369B9,0x655ABB50,0x40685A32,0x3C2AB4B3,0x319EE9D5,0xC021B8F7,
+ 0x9B540B19,0x875FA099,0x95F7997E,0x623D7DA8,0xF837889A,0x97E32D77,
+ 0x11ED935F,0x16681281,0x0E358829,0xC7E61FD6,0x96DEDFA1,0x7858BA99,
+ 0x57F584A5,0x1B227263,0x9B83C3FF,0x1AC24696,0xCDB30AEB,0x532E3054,
+ 0x8FD948E4,0x6DBC3128,0x58EBF2EF,0x34C6FFEA,0xFE28ED61,0xEE7C3C73,
+ 0x5D4A14D9,0xE864B7E3,0x42105D14,0x203E13E0,0x45EEE2B6,0xA3AAABEA,
+ 0xDB6C4F15,0xFACB4FD0,0xC742F442,0xEF6ABBB5,0x654F3B1D,0x41CD2105,
+ 0xD81E799E,0x86854DC7,0xE44B476A,0x3D816250,0xCF62A1F2,0x5B8D2646,
+ 0xFC8883A0,0xC1C7B6A3,0x7F1524C3,0x69CB7492,0x47848A0B,0x5692B285,
+ 0x095BBF00,0xAD19489D,0x1462B174,0x23820E00,0x58428D2A,0x0C55F5EA,
+ 0x1DADF43E,0x233F7061,0x3372F092,0x8D937E41,0xD65FECF1,0x6C223BDB,
+ 0x7CDE3759,0xCBEE7460,0x4085F2A7,0xCE77326E,0xA6078084,0x19F8509E,
+ 0xE8EFD855,0x61D99735,0xA969A7AA,0xC50C06C2,0x5A04ABFC,0x800BCADC,
+ 0x9E447A2E,0xC3453484,0xFDD56705,0x0E1E9EC9,0xDB73DBD3,0x105588CD,
+ 0x675FDA79,0xE3674340,0xC5C43465,0x713E38D8,0x3D28F89E,0xF16DFF20,
+ 0x153E21E7,0x8FB03D4A,0xE6E39F2B,0xDB83ADF7 };
+
+static const u32 ks2[256] = {
+ 0xE93D5A68,0x948140F7,0xF64C261C,0x94692934,0x411520F7,0x7602D4F7,
+ 0xBCF46B2E,0xD4A20068,0xD4082471,0x3320F46A,0x43B7D4B7,0x500061AF,
+ 0x1E39F62E,0x97244546,0x14214F74,0xBF8B8840,0x4D95FC1D,0x96B591AF,
+ 0x70F4DDD3,0x66A02F45,0xBFBC09EC,0x03BD9785,0x7FAC6DD0,0x31CB8504,
+ 0x96EB27B3,0x55FD3941,0xDA2547E6,0xABCA0A9A,0x28507825,0x530429F4,
+ 0x0A2C86DA,0xE9B66DFB,0x68DC1462,0xD7486900,0x680EC0A4,0x27A18DEE,
+ 0x4F3FFEA2,0xE887AD8C,0xB58CE006,0x7AF4D6B6,0xAACE1E7C,0xD3375FEC,
+ 0xCE78A399,0x406B2A42,0x20FE9E35,0xD9F385B9,0xEE39D7AB,0x3B124E8B,
+ 0x1DC9FAF7,0x4B6D1856,0x26A36631,0xEAE397B2,0x3A6EFA74,0xDD5B4332,
+ 0x6841E7F7,0xCA7820FB,0xFB0AF54E,0xD8FEB397,0x454056AC,0xBA489527,
+ 0x55533A3A,0x20838D87,0xFE6BA9B7,0xD096954B,0x55A867BC,0xA1159A58,
+ 0xCCA92963,0x99E1DB33,0xA62A4A56,0x3F3125F9,0x5EF47E1C,0x9029317C,
+ 0xFDF8E802,0x04272F70,0x80BB155C,0x05282CE3,0x95C11548,0xE4C66D22,
+ 0x48C1133F,0xC70F86DC,0x07F9C9EE,0x41041F0F,0x404779A4,0x5D886E17,
+ 0x325F51EB,0xD59BC0D1,0xF2BCC18F,0x41113564,0x257B7834,0x602A9C60,
+ 0xDFF8E8A3,0x1F636C1B,0x0E12B4C2,0x02E1329E,0xAF664FD1,0xCAD18115,
+ 0x6B2395E0,0x333E92E1,0x3B240B62,0xEEBEB922,0x85B2A20E,0xE6BA0D99,
+ 0xDE720C8C,0x2DA2F728,0xD0127845,0x95B794FD,0x647D0862,0xE7CCF5F0,
+ 0x5449A36F,0x877D48FA,0xC39DFD27,0xF33E8D1E,0x0A476341,0x992EFF74,
+ 0x3A6F6EAB,0xF4F8FD37,0xA812DC60,0xA1EBDDF8,0x991BE14C,0xDB6E6B0D,
+ 0xC67B5510,0x6D672C37,0x2765D43B,0xDCD0E804,0xF1290DC7,0xCC00FFA3,
+ 0xB5390F92,0x690FED0B,0x667B9FFB,0xCEDB7D9C,0xA091CF0B,0xD9155EA3,
+ 0xBB132F88,0x515BAD24,0x7B9479BF,0x763BD6EB,0x37392EB3,0xCC115979,
+ 0x8026E297,0xF42E312D,0x6842ADA7,0xC66A2B3B,0x12754CCC,0x782EF11C,
+ 0x6A124237,0xB79251E7,0x06A1BBE6,0x4BFB6350,0x1A6B1018,0x11CAEDFA,
+ 0x3D25BDD8,0xE2E1C3C9,0x44421659,0x0A121386,0xD90CEC6E,0xD5ABEA2A,
+ 0x64AF674E,0xDA86A85F,0xBEBFE988,0x64E4C3FE,0x9DBC8057,0xF0F7C086,
+ 0x60787BF8,0x6003604D,0xD1FD8346,0xF6381FB0,0x7745AE04,0xD736FCCC,
+ 0x83426B33,0xF01EAB71,0xB0804187,0x3C005E5F,0x77A057BE,0xBDE8AE24,
+ 0x55464299,0xBF582E61,0x4E58F48F,0xF2DDFDA2,0xF474EF38,0x8789BDC2,
+ 0x5366F9C3,0xC8B38E74,0xB475F255,0x46FCD9B9,0x7AEB2661,0x8B1DDF84,
+ 0x846A0E79,0x915F95E2,0x466E598E,0x20B45770,0x8CD55591,0xC902DE4C,
+ 0xB90BACE1,0xBB8205D0,0x11A86248,0x7574A99E,0xB77F19B6,0xE0A9DC09,
+ 0x662D09A1,0xC4324633,0xE85A1F02,0x09F0BE8C,0x4A99A025,0x1D6EFE10,
+ 0x1AB93D1D,0x0BA5A4DF,0xA186F20F,0x2868F169,0xDCB7DA83,0x573906FE,
+ 0xA1E2CE9B,0x4FCD7F52,0x50115E01,0xA70683FA,0xA002B5C4,0x0DE6D027,
+ 0x9AF88C27,0x773F8641,0xC3604C06,0x61A806B5,0xF0177A28,0xC0F586E0,
+ 0x006058AA,0x30DC7D62,0x11E69ED7,0x2338EA63,0x53C2DD94,0xC2C21634,
+ 0xBBCBEE56,0x90BCB6DE,0xEBFC7DA1,0xCE591D76,0x6F05E409,0x4B7C0188,
+ 0x39720A3D,0x7C927C24,0x86E3725F,0x724D9DB9,0x1AC15BB4,0xD39EB8FC,
+ 0xED545578,0x08FCA5B5,0xD83D7CD3,0x4DAD0FC4,0x1E50EF5E,0xB161E6F8,
+ 0xA28514D9,0x6C51133C,0x6FD5C7E7,0x56E14EC4,0x362ABFCE,0xDDC6C837,
+ 0xD79A3234,0x92638212,0x670EFA8E,0x406000E0 };
+
+static const u32 ks3[256] = {
+ 0x3A39CE37,0xD3FAF5CF,0xABC27737,0x5AC52D1B,0x5CB0679E,0x4FA33742,
+ 0xD3822740,0x99BC9BBE,0xD5118E9D,0xBF0F7315,0xD62D1C7E,0xC700C47B,
+ 0xB78C1B6B,0x21A19045,0xB26EB1BE,0x6A366EB4,0x5748AB2F,0xBC946E79,
+ 0xC6A376D2,0x6549C2C8,0x530FF8EE,0x468DDE7D,0xD5730A1D,0x4CD04DC6,
+ 0x2939BBDB,0xA9BA4650,0xAC9526E8,0xBE5EE304,0xA1FAD5F0,0x6A2D519A,
+ 0x63EF8CE2,0x9A86EE22,0xC089C2B8,0x43242EF6,0xA51E03AA,0x9CF2D0A4,
+ 0x83C061BA,0x9BE96A4D,0x8FE51550,0xBA645BD6,0x2826A2F9,0xA73A3AE1,
+ 0x4BA99586,0xEF5562E9,0xC72FEFD3,0xF752F7DA,0x3F046F69,0x77FA0A59,
+ 0x80E4A915,0x87B08601,0x9B09E6AD,0x3B3EE593,0xE990FD5A,0x9E34D797,
+ 0x2CF0B7D9,0x022B8B51,0x96D5AC3A,0x017DA67D,0xD1CF3ED6,0x7C7D2D28,
+ 0x1F9F25CF,0xADF2B89B,0x5AD6B472,0x5A88F54C,0xE029AC71,0xE019A5E6,
+ 0x47B0ACFD,0xED93FA9B,0xE8D3C48D,0x283B57CC,0xF8D56629,0x79132E28,
+ 0x785F0191,0xED756055,0xF7960E44,0xE3D35E8C,0x15056DD4,0x88F46DBA,
+ 0x03A16125,0x0564F0BD,0xC3EB9E15,0x3C9057A2,0x97271AEC,0xA93A072A,
+ 0x1B3F6D9B,0x1E6321F5,0xF59C66FB,0x26DCF319,0x7533D928,0xB155FDF5,
+ 0x03563482,0x8ABA3CBB,0x28517711,0xC20AD9F8,0xABCC5167,0xCCAD925F,
+ 0x4DE81751,0x3830DC8E,0x379D5862,0x9320F991,0xEA7A90C2,0xFB3E7BCE,
+ 0x5121CE64,0x774FBE32,0xA8B6E37E,0xC3293D46,0x48DE5369,0x6413E680,
+ 0xA2AE0810,0xDD6DB224,0x69852DFD,0x09072166,0xB39A460A,0x6445C0DD,
+ 0x586CDECF,0x1C20C8AE,0x5BBEF7DD,0x1B588D40,0xCCD2017F,0x6BB4E3BB,
+ 0xDDA26A7E,0x3A59FF45,0x3E350A44,0xBCB4CDD5,0x72EACEA8,0xFA6484BB,
+ 0x8D6612AE,0xBF3C6F47,0xD29BE463,0x542F5D9E,0xAEC2771B,0xF64E6370,
+ 0x740E0D8D,0xE75B1357,0xF8721671,0xAF537D5D,0x4040CB08,0x4EB4E2CC,
+ 0x34D2466A,0x0115AF84,0xE1B00428,0x95983A1D,0x06B89FB4,0xCE6EA048,
+ 0x6F3F3B82,0x3520AB82,0x011A1D4B,0x277227F8,0x611560B1,0xE7933FDC,
+ 0xBB3A792B,0x344525BD,0xA08839E1,0x51CE794B,0x2F32C9B7,0xA01FBAC9,
+ 0xE01CC87E,0xBCC7D1F6,0xCF0111C3,0xA1E8AAC7,0x1A908749,0xD44FBD9A,
+ 0xD0DADECB,0xD50ADA38,0x0339C32A,0xC6913667,0x8DF9317C,0xE0B12B4F,
+ 0xF79E59B7,0x43F5BB3A,0xF2D519FF,0x27D9459C,0xBF97222C,0x15E6FC2A,
+ 0x0F91FC71,0x9B941525,0xFAE59361,0xCEB69CEB,0xC2A86459,0x12BAA8D1,
+ 0xB6C1075E,0xE3056A0C,0x10D25065,0xCB03A442,0xE0EC6E0E,0x1698DB3B,
+ 0x4C98A0BE,0x3278E964,0x9F1F9532,0xE0D392DF,0xD3A0342B,0x8971F21E,
+ 0x1B0A7441,0x4BA3348C,0xC5BE7120,0xC37632D8,0xDF359F8D,0x9B992F2E,
+ 0xE60B6F47,0x0FE3F11D,0xE54CDA54,0x1EDAD891,0xCE6279CF,0xCD3E7E6F,
+ 0x1618B166,0xFD2C1D05,0x848FD2C5,0xF6FB2299,0xF523F357,0xA6327623,
+ 0x93A83531,0x56CCCD02,0xACF08162,0x5A75EBB5,0x6E163697,0x88D273CC,
+ 0xDE966292,0x81B949D0,0x4C50901B,0x71C65614,0xE6C6C7BD,0x327A140A,
+ 0x45E1D006,0xC3F27B9A,0xC9AA53FD,0x62A80F00,0xBB25BFE2,0x35BDD2F6,
+ 0x71126905,0xB2040222,0xB6CBCF7C,0xCD769C2B,0x53113EC0,0x1640E3D3,
+ 0x38ABBD60,0x2547ADF0,0xBA38209C,0xF746CE76,0x77AFA1C5,0x20756060,
+ 0x85CBFE4E,0x8AE88DD8,0x7AAAF9B0,0x4CF9AA7E,0x1948C25C,0x02FB8A8C,
+ 0x01C36AE4,0xD6EBE1F9,0x90D4F869,0xA65CDEA0,0x3F09252D,0xC208E69F,
+ 0xB74E6132,0xCE77E25B,0x578FDFE3,0x3AC372E6 };
+
+static const u32 ps[16+2] = {
+ 0x243F6A88,0x85A308D3,0x13198A2E,0x03707344,0xA4093822,0x299F31D0,
+ 0x082EFA98,0xEC4E6C89,0x452821E6,0x38D01377,0xBE5466CF,0x34E90C6C,
+ 0xC0AC29B7,0xC97C50DD,0x3F84D5B5,0xB5470917,0x9216D5D9,0x8979FB1B };
+
+
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementations of Blowfish. */
+extern void _gcry_blowfish_amd64_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
+ u32 *ret_xr);
+
+extern void _gcry_blowfish_amd64_encrypt_block(BLOWFISH_context *c, byte *out,
+ const byte *in);
+
+extern void _gcry_blowfish_amd64_decrypt_block(BLOWFISH_context *c, byte *out,
+ const byte *in);
+
+/* These assembly implementations process four blocks in parallel. */
+extern void _gcry_blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+static void
+do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
+{
+ _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
+}
+
+static void
+do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
+}
+
+static inline void
+blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *ctr)
+{
+ _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
+}
+
+static inline void
+blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+ _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
+}
+
+static inline void
+blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+ _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *c = (BLOWFISH_context *) context;
+ do_encrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (2*8);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *c = (BLOWFISH_context *) context;
+ do_decrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (2*8);
+}
+
+#elif defined(USE_ARM_ASM)
+
+/* Assembly implementations of Blowfish. */
+extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
+ u32 *ret_xr);
+
+extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out,
+ const byte *in);
+
+extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out,
+ const byte *in);
+
+/* These assembly implementations process two blocks in parallel. */
+extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+static void
+do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
+{
+ _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr);
+}
+
+static void
+do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *c = (BLOWFISH_context *) context;
+ do_encrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (10*4);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *c = (BLOWFISH_context *) context;
+ do_decrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (10*4);
+}
+
+#else /*USE_ARM_ASM*/
+
+
+#define F(x) ((( s0[(x)>>24] + s1[((x)>>16)&0xff]) \
+ ^ s2[((x)>>8)&0xff]) + s3[(x)&0xff] )
+#define R(l,r,i) do { l ^= p[i]; r ^= F(l); } while(0)
+#define R3(l,r,i) do { R(l##0,r##0,i);R(l##1,r##1,i);R(l##2,r##2,i);} while(0)
+
+
+static void
+do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
+{
+ u32 xl, xr, *s0, *s1, *s2, *s3, *p;
+
+ xl = *ret_xl;
+ xr = *ret_xr;
+ p = bc->p;
+ s0 = bc->s0;
+ s1 = bc->s1;
+ s2 = bc->s2;
+ s3 = bc->s3;
+
+ R( xl, xr, 0);
+ R( xr, xl, 1);
+ R( xl, xr, 2);
+ R( xr, xl, 3);
+ R( xl, xr, 4);
+ R( xr, xl, 5);
+ R( xl, xr, 6);
+ R( xr, xl, 7);
+ R( xl, xr, 8);
+ R( xr, xl, 9);
+ R( xl, xr, 10);
+ R( xr, xl, 11);
+ R( xl, xr, 12);
+ R( xr, xl, 13);
+ R( xl, xr, 14);
+ R( xr, xl, 15);
+
+ xl ^= p[16];
+ xr ^= p[16+1];
+
+ *ret_xl = xr;
+ *ret_xr = xl;
+}
+
+
+static void
+do_encrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src )
+{
+ u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p;
+
+ xl0 = buf_get_be32(src + 0);
+ xr0 = buf_get_be32(src + 4);
+ xl1 = buf_get_be32(src + 8);
+ xr1 = buf_get_be32(src + 12);
+ xl2 = buf_get_be32(src + 16);
+ xr2 = buf_get_be32(src + 20);
+ p = bc->p;
+ s0 = bc->s0;
+ s1 = bc->s1;
+ s2 = bc->s2;
+ s3 = bc->s3;
+
+ R3( xl, xr, 0);
+ R3( xr, xl, 1);
+ R3( xl, xr, 2);
+ R3( xr, xl, 3);
+ R3( xl, xr, 4);
+ R3( xr, xl, 5);
+ R3( xl, xr, 6);
+ R3( xr, xl, 7);
+ R3( xl, xr, 8);
+ R3( xr, xl, 9);
+ R3( xl, xr, 10);
+ R3( xr, xl, 11);
+ R3( xl, xr, 12);
+ R3( xr, xl, 13);
+ R3( xl, xr, 14);
+ R3( xr, xl, 15);
+
+ xl0 ^= p[16];
+ xr0 ^= p[16+1];
+ xl1 ^= p[16];
+ xr1 ^= p[16+1];
+ xl2 ^= p[16];
+ xr2 ^= p[16+1];
+
+ buf_put_be32(dst + 0, xr0);
+ buf_put_be32(dst + 4, xl0);
+ buf_put_be32(dst + 8, xr1);
+ buf_put_be32(dst + 12, xl1);
+ buf_put_be32(dst + 16, xr2);
+ buf_put_be32(dst + 20, xl2);
+}
+
+
+static void
+decrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
+{
+ u32 xl, xr, *s0, *s1, *s2, *s3, *p;
+
+ xl = *ret_xl;
+ xr = *ret_xr;
+ p = bc->p;
+ s0 = bc->s0;
+ s1 = bc->s1;
+ s2 = bc->s2;
+ s3 = bc->s3;
+
+ R( xl, xr, 17);
+ R( xr, xl, 16);
+ R( xl, xr, 15);
+ R( xr, xl, 14);
+ R( xl, xr, 13);
+ R( xr, xl, 12);
+ R( xl, xr, 11);
+ R( xr, xl, 10);
+ R( xl, xr, 9);
+ R( xr, xl, 8);
+ R( xl, xr, 7);
+ R( xr, xl, 6);
+ R( xl, xr, 5);
+ R( xr, xl, 4);
+ R( xl, xr, 3);
+ R( xr, xl, 2);
+
+ xl ^= p[1];
+ xr ^= p[0];
+
+ *ret_xl = xr;
+ *ret_xr = xl;
+}
+
+
+static void
+do_decrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src )
+{
+ u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p;
+
+ xl0 = buf_get_be32(src + 0);
+ xr0 = buf_get_be32(src + 4);
+ xl1 = buf_get_be32(src + 8);
+ xr1 = buf_get_be32(src + 12);
+ xl2 = buf_get_be32(src + 16);
+ xr2 = buf_get_be32(src + 20);
+ p = bc->p;
+ s0 = bc->s0;
+ s1 = bc->s1;
+ s2 = bc->s2;
+ s3 = bc->s3;
+
+ R3( xl, xr, 17);
+ R3( xr, xl, 16);
+ R3( xl, xr, 15);
+ R3( xr, xl, 14);
+ R3( xl, xr, 13);
+ R3( xr, xl, 12);
+ R3( xl, xr, 11);
+ R3( xr, xl, 10);
+ R3( xl, xr, 9);
+ R3( xr, xl, 8);
+ R3( xl, xr, 7);
+ R3( xr, xl, 6);
+ R3( xl, xr, 5);
+ R3( xr, xl, 4);
+ R3( xl, xr, 3);
+ R3( xr, xl, 2);
+
+ xl0 ^= p[1];
+ xr0 ^= p[0];
+ xl1 ^= p[1];
+ xr1 ^= p[0];
+ xl2 ^= p[1];
+ xr2 ^= p[0];
+
+ buf_put_be32(dst + 0, xr0);
+ buf_put_be32(dst + 4, xl0);
+ buf_put_be32(dst + 8, xr1);
+ buf_put_be32(dst + 12, xl1);
+ buf_put_be32(dst + 16, xr2);
+ buf_put_be32(dst + 20, xl2);
+}
+
+#undef F
+#undef R
+#undef R3
+
+static void
+do_encrypt_block ( BLOWFISH_context *bc, byte *outbuf, const byte *inbuf )
+{
+ u32 d1, d2;
+
+ d1 = buf_get_be32(inbuf);
+ d2 = buf_get_be32(inbuf + 4);
+ do_encrypt( bc, &d1, &d2 );
+ buf_put_be32(outbuf, d1);
+ buf_put_be32(outbuf + 4, d2);
+}
+
+static unsigned int
+encrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *bc = (BLOWFISH_context *) context;
+ do_encrypt_block (bc, outbuf, inbuf);
+ return /*burn_stack*/ (64);
+}
+
+
+static void
+do_decrypt_block (BLOWFISH_context *bc, byte *outbuf, const byte *inbuf)
+{
+ u32 d1, d2;
+
+ d1 = buf_get_be32(inbuf);
+ d2 = buf_get_be32(inbuf + 4);
+ decrypt( bc, &d1, &d2 );
+ buf_put_be32(outbuf, d1);
+ buf_put_be32(outbuf + 4, d2);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *bc = (BLOWFISH_context *) context;
+ do_decrypt_block (bc, outbuf, inbuf);
+ return /*burn_stack*/ (64);
+}
+
+#endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/
+
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size BLOWFISH_BLOCKSIZE. */
+static void
+_gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ BLOWFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3];
+ int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+ {
+ if (nblocks >= 4)
+ burn_stack_depth += 5 * sizeof(void*);
+
+ /* Process data in 4 block chunks. */
+ while (nblocks >= 4)
+ {
+ blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 4;
+ outbuf += 4 * BLOWFISH_BLOCKSIZE;
+ inbuf += 4 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#elif defined(USE_ARM_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 2;
+ outbuf += 2 * BLOWFISH_BLOCKSIZE;
+ inbuf += 2 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+ for ( ;nblocks >= 3; nblocks -= 3)
+ {
+ /* Prepare the counter blocks. */
+ cipher_block_cpy (tmpbuf + 0, ctr, BLOWFISH_BLOCKSIZE);
+ cipher_block_cpy (tmpbuf + 8, ctr, BLOWFISH_BLOCKSIZE);
+ cipher_block_cpy (tmpbuf + 16, ctr, BLOWFISH_BLOCKSIZE);
+ cipher_block_add (tmpbuf + 8, 1, BLOWFISH_BLOCKSIZE);
+ cipher_block_add (tmpbuf + 16, 2, BLOWFISH_BLOCKSIZE);
+ cipher_block_add (ctr, 3, BLOWFISH_BLOCKSIZE);
+ /* Encrypt the counter. */
+ do_encrypt_3(ctx, tmpbuf, tmpbuf);
+ /* XOR the input with the encrypted counter and store in output. */
+ buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE * 3);
+ outbuf += BLOWFISH_BLOCKSIZE * 3;
+ inbuf += BLOWFISH_BLOCKSIZE * 3;
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ do_encrypt_block(ctx, tmpbuf, ctr);
+ /* XOR the input with the encrypted counter and store in output. */
+ cipher_block_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE);
+ outbuf += BLOWFISH_BLOCKSIZE;
+ inbuf += BLOWFISH_BLOCKSIZE;
+ /* Increment the counter. */
+ cipher_block_add (ctr, 1, BLOWFISH_BLOCKSIZE);
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ BLOWFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[BLOWFISH_BLOCKSIZE * 3];
+ int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+ {
+ if (nblocks >= 4)
+ burn_stack_depth += 5 * sizeof(void*);
+
+ /* Process data in 4 block chunks. */
+ while (nblocks >= 4)
+ {
+ blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 4;
+ outbuf += 4 * BLOWFISH_BLOCKSIZE;
+ inbuf += 4 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#elif defined(USE_ARM_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 2;
+ outbuf += 2 * BLOWFISH_BLOCKSIZE;
+ inbuf += 2 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+ for ( ;nblocks >= 3; nblocks -= 3)
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ do_decrypt_3 (ctx, savebuf, inbuf);
+
+ cipher_block_xor_1 (savebuf + 0, iv, BLOWFISH_BLOCKSIZE);
+ cipher_block_xor_1 (savebuf + 8, inbuf, BLOWFISH_BLOCKSIZE * 2);
+ cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE);
+ buf_cpy (outbuf, savebuf, BLOWFISH_BLOCKSIZE * 3);
+ inbuf += BLOWFISH_BLOCKSIZE * 3;
+ outbuf += BLOWFISH_BLOCKSIZE * 3;
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ do_decrypt_block (ctx, savebuf, inbuf);
+
+ cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
+ inbuf += BLOWFISH_BLOCKSIZE;
+ outbuf += BLOWFISH_BLOCKSIZE;
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ BLOWFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3];
+ int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+ {
+ if (nblocks >= 4)
+ burn_stack_depth += 5 * sizeof(void*);
+
+ /* Process data in 4 block chunks. */
+ while (nblocks >= 4)
+ {
+ blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 4;
+ outbuf += 4 * BLOWFISH_BLOCKSIZE;
+ inbuf += 4 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#elif defined(USE_ARM_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 2;
+ outbuf += 2 * BLOWFISH_BLOCKSIZE;
+ inbuf += 2 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+ for ( ;nblocks >= 3; nblocks -= 3 )
+ {
+ cipher_block_cpy (tmpbuf + 0, iv, BLOWFISH_BLOCKSIZE);
+ cipher_block_cpy (tmpbuf + 8, inbuf + 0, BLOWFISH_BLOCKSIZE * 2);
+ cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE);
+ do_encrypt_3 (ctx, tmpbuf, tmpbuf);
+ buf_xor (outbuf, inbuf, tmpbuf, BLOWFISH_BLOCKSIZE * 3);
+ outbuf += BLOWFISH_BLOCKSIZE * 3;
+ inbuf += BLOWFISH_BLOCKSIZE * 3;
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_encrypt_block(ctx, iv, iv);
+ cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
+ outbuf += BLOWFISH_BLOCKSIZE;
+ inbuf += BLOWFISH_BLOCKSIZE;
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Run the self-tests for BLOWFISH-CTR, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+ const int nblocks = 4+1;
+ const int blocksize = BLOWFISH_BLOCKSIZE;
+ const int context_size = sizeof(BLOWFISH_context);
+
+ return _gcry_selftest_helper_ctr("BLOWFISH", &bf_setkey,
+ &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for BLOWFISH-CBC, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+ const int nblocks = 4+2;
+ const int blocksize = BLOWFISH_BLOCKSIZE;
+ const int context_size = sizeof(BLOWFISH_context);
+
+ return _gcry_selftest_helper_cbc("BLOWFISH", &bf_setkey,
+ &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for BLOWFISH-CFB, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+ const int nblocks = 4+2;
+ const int blocksize = BLOWFISH_BLOCKSIZE;
+ const int context_size = sizeof(BLOWFISH_context);
+
+ return _gcry_selftest_helper_cfb("BLOWFISH", &bf_setkey,
+ &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+static const char*
+selftest(void)
+{
+ BLOWFISH_context c;
+ cipher_bulk_ops_t bulk_ops;
+ byte plain[] = "BLOWFISH";
+ byte buffer[8];
+ static const byte plain3[] =
+ { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 };
+ static const byte key3[] =
+ { 0x41, 0x79, 0x6E, 0xA0, 0x52, 0x61, 0x6E, 0xE4 };
+ static const byte cipher3[] =
+ { 0xE1, 0x13, 0xF4, 0x10, 0x2C, 0xFC, 0xCE, 0x43 };
+ const char *r;
+
+ bf_setkey( (void *) &c,
+ (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26,
+ &bulk_ops );
+ encrypt_block( (void *) &c, buffer, plain );
+ if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) )
+ return "Blowfish selftest failed (1).";
+ decrypt_block( (void *) &c, buffer, buffer );
+ if( memcmp( buffer, plain, 8 ) )
+ return "Blowfish selftest failed (2).";
+
+ bf_setkey( (void *) &c, key3, 8, &bulk_ops );
+ encrypt_block( (void *) &c, buffer, plain3 );
+ if( memcmp( buffer, cipher3, 8 ) )
+ return "Blowfish selftest failed (3).";
+ decrypt_block( (void *) &c, buffer, buffer );
+ if( memcmp( buffer, plain3, 8 ) )
+ return "Blowfish selftest failed (4).";
+
+ if ( (r = selftest_cbc ()) )
+ return r;
+
+ if ( (r = selftest_cfb ()) )
+ return r;
+
+ if ( (r = selftest_ctr ()) )
+ return r;
+
+ return NULL;
+}
+
+
+struct hashset_elem {
+ u32 val;
+ short nidx;
+ char used;
+};
+
+static inline byte
+val_to_hidx(u32 val)
+{
+ /* bf sboxes are quite random already. */
+ return (val >> 24) ^ (val >> 16) ^ (val >> 8) ^ val;
+}
+
+static inline int
+add_val(struct hashset_elem hset[256], u32 val, int *midx,
+ struct hashset_elem *mpool)
+{
+ struct hashset_elem *elem;
+ byte hidx;
+
+ hidx = val_to_hidx(val);
+ elem = &hset[hidx];
+
+ /* Check if first is in use. */
+ if (elem->used == 0)
+ {
+ elem->val = val;
+ elem->nidx = -1;
+ elem->used = 1;
+ return 0;
+ }
+
+ /* Check if first matches. */
+ if (elem->val == val)
+ return 1;
+
+ for (; elem->nidx >= 0; elem = &mpool[elem->nidx])
+ {
+ /* Check if elem matches. */
+ if (elem->val == val)
+ return 1;
+ }
+
+ elem->nidx = (*midx)++;
+ elem = &mpool[elem->nidx];
+
+ elem->val = val;
+ elem->nidx = -1;
+ elem->used = 1;
+
+ return 0;
+}
+
+static gcry_err_code_t
+do_bf_setkey (BLOWFISH_context *c, const byte *key, unsigned keylen)
+{
+ struct hashset_elem mempool[4 * 255]; /* Enough entries for the worst case. */
+ struct hashset_elem hset[4][256];
+ int memidx = 0;
+ int weak = 0;
+ int i, j, ret;
+ u32 data, datal, datar;
+ static int initialized;
+ static const char *selftest_failed;
+
+ if( !initialized )
+ {
+ initialized = 1;
+ selftest_failed = selftest();
+ if( selftest_failed )
+ log_error ("%s\n", selftest_failed );
+ }
+ if( selftest_failed )
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if (keylen < BLOWFISH_KEY_MIN_BITS / 8 ||
+ keylen > BLOWFISH_KEY_MAX_BITS / 8)
+ return GPG_ERR_INV_KEYLEN;
+
+ memset(hset, 0, sizeof(hset));
+
+ for(i=0; i < 16+2; i++ )
+ c->p[i] = ps[i];
+ for(i=0; i < 256; i++ )
+ {
+ c->s0[i] = ks0[i];
+ c->s1[i] = ks1[i];
+ c->s2[i] = ks2[i];
+ c->s3[i] = ks3[i];
+ }
+
+ for(i=j=0; i < 16+2; i++ )
+ {
+ data = ((u32)key[j] << 24) |
+ ((u32)key[(j+1)%keylen] << 16) |
+ ((u32)key[(j+2)%keylen] << 8) |
+ ((u32)key[(j+3)%keylen]);
+ c->p[i] ^= data;
+ j = (j+4) % keylen;
+ }
+
+ datal = datar = 0;
+ for(i=0; i < 16+2; i += 2 )
+ {
+ do_encrypt( c, &datal, &datar );
+ c->p[i] = datal;
+ c->p[i+1] = datar;
+ }
+ for(i=0; i < 256; i += 2 )
+ {
+ do_encrypt( c, &datal, &datar );
+ c->s0[i] = datal;
+ c->s0[i+1] = datar;
+
+ /* Add values to hashset, detect duplicates (weak keys). */
+ ret = add_val (hset[0], datal, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ ret = add_val (hset[0], datar, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ }
+ for(i=0; i < 256; i += 2 )
+ {
+ do_encrypt( c, &datal, &datar );
+ c->s1[i] = datal;
+ c->s1[i+1] = datar;
+
+ /* Add values to hashset, detect duplicates (weak keys). */
+ ret = add_val (hset[1], datal, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ ret = add_val (hset[1], datar, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ }
+ for(i=0; i < 256; i += 2 )
+ {
+ do_encrypt( c, &datal, &datar );
+ c->s2[i] = datal;
+ c->s2[i+1] = datar;
+
+ /* Add values to hashset, detect duplicates (weak keys). */
+ ret = add_val (hset[2], datal, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ ret = add_val (hset[2], datar, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ }
+ for(i=0; i < 256; i += 2 )
+ {
+ do_encrypt( c, &datal, &datar );
+ c->s3[i] = datal;
+ c->s3[i+1] = datar;
+
+ /* Add values to hashset, detect duplicates (weak keys). */
+ ret = add_val (hset[3], datal, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ ret = add_val (hset[3], datar, &memidx, mempool);
+ weak = ret ? 1 : weak;
+ }
+
+ /* Clear stack. */
+ wipememory(hset, sizeof(hset));
+ wipememory(mempool, sizeof(mempool[0]) * memidx);
+
+ _gcry_burn_stack (64);
+
+ /* Check for weak key. A weak key is a key in which a value in
+ the P-array (here c) occurs more than once per table. */
+ if (weak)
+ return GPG_ERR_WEAK_KEY;
+
+ return GPG_ERR_NO_ERROR;
+}
+
+
+static gcry_err_code_t
+bf_setkey (void *context, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ BLOWFISH_context *c = (BLOWFISH_context *) context;
+ gcry_err_code_t rc = do_bf_setkey (c, key, keylen);
+
+ /* Setup bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cfb_dec = _gcry_blowfish_cfb_dec;
+ bulk_ops->cbc_dec = _gcry_blowfish_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_blowfish_ctr_enc;
+
+ return rc;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_blowfish =
+ {
+ GCRY_CIPHER_BLOWFISH, {0, 0},
+ "BLOWFISH", NULL, NULL, BLOWFISH_BLOCKSIZE, 128,
+ sizeof (BLOWFISH_context),
+ bf_setkey, encrypt_block, decrypt_block
+ };
diff --git a/comm/third_party/libgcrypt/cipher/bufhelp.h b/comm/third_party/libgcrypt/cipher/bufhelp.h
new file mode 100644
index 0000000000..fa5b2e8ece
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/bufhelp.h
@@ -0,0 +1,385 @@
+/* bufhelp.h - Some buffer manipulation helpers
+ * Copyright (C) 2012-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GCRYPT_BUFHELP_H
+#define GCRYPT_BUFHELP_H
+
+
+#include "g10lib.h"
+#include "bithelp.h"
+
+
+#undef BUFHELP_UNALIGNED_ACCESS
+#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \
+ defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \
+ defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS)
+/* Compiler is supports attributes needed for automatically issuing unaligned
+ memory access instructions.
+ */
+# define BUFHELP_UNALIGNED_ACCESS 1
+#endif
+
+
+#ifndef BUFHELP_UNALIGNED_ACCESS
+
+/* Functions for loading and storing unaligned u32 values of different
+ endianness. */
+static inline u32 buf_get_be32(const void *_buf)
+{
+ const byte *in = _buf;
+ return ((u32)in[0] << 24) | ((u32)in[1] << 16) | \
+ ((u32)in[2] << 8) | (u32)in[3];
+}
+
+static inline u32 buf_get_le32(const void *_buf)
+{
+ const byte *in = _buf;
+ return ((u32)in[3] << 24) | ((u32)in[2] << 16) | \
+ ((u32)in[1] << 8) | (u32)in[0];
+}
+
+static inline void buf_put_be32(void *_buf, u32 val)
+{
+ byte *out = _buf;
+ out[0] = val >> 24;
+ out[1] = val >> 16;
+ out[2] = val >> 8;
+ out[3] = val;
+}
+
+static inline void buf_put_le32(void *_buf, u32 val)
+{
+ byte *out = _buf;
+ out[3] = val >> 24;
+ out[2] = val >> 16;
+ out[1] = val >> 8;
+ out[0] = val;
+}
+
+
+/* Functions for loading and storing unaligned u64 values of different
+ endianness. */
+static inline u64 buf_get_be64(const void *_buf)
+{
+ const byte *in = _buf;
+ return ((u64)in[0] << 56) | ((u64)in[1] << 48) | \
+ ((u64)in[2] << 40) | ((u64)in[3] << 32) | \
+ ((u64)in[4] << 24) | ((u64)in[5] << 16) | \
+ ((u64)in[6] << 8) | (u64)in[7];
+}
+
+static inline u64 buf_get_le64(const void *_buf)
+{
+ const byte *in = _buf;
+ return ((u64)in[7] << 56) | ((u64)in[6] << 48) | \
+ ((u64)in[5] << 40) | ((u64)in[4] << 32) | \
+ ((u64)in[3] << 24) | ((u64)in[2] << 16) | \
+ ((u64)in[1] << 8) | (u64)in[0];
+}
+
+static inline void buf_put_be64(void *_buf, u64 val)
+{
+ byte *out = _buf;
+ out[0] = val >> 56;
+ out[1] = val >> 48;
+ out[2] = val >> 40;
+ out[3] = val >> 32;
+ out[4] = val >> 24;
+ out[5] = val >> 16;
+ out[6] = val >> 8;
+ out[7] = val;
+}
+
+static inline void buf_put_le64(void *_buf, u64 val)
+{
+ byte *out = _buf;
+ out[7] = val >> 56;
+ out[6] = val >> 48;
+ out[5] = val >> 40;
+ out[4] = val >> 32;
+ out[3] = val >> 24;
+ out[2] = val >> 16;
+ out[1] = val >> 8;
+ out[0] = val;
+}
+
+#else /*BUFHELP_UNALIGNED_ACCESS*/
+
+typedef struct bufhelp_u32_s
+{
+ u32 a;
+} __attribute__((packed, aligned(1), may_alias)) bufhelp_u32_t;
+
+/* Functions for loading and storing unaligned u32 values of different
+ endianness. */
+static inline u32 buf_get_be32(const void *_buf)
+{
+ return be_bswap32(((const bufhelp_u32_t *)_buf)->a);
+}
+
+static inline u32 buf_get_le32(const void *_buf)
+{
+ return le_bswap32(((const bufhelp_u32_t *)_buf)->a);
+}
+
+static inline void buf_put_be32(void *_buf, u32 val)
+{
+ bufhelp_u32_t *out = _buf;
+ out->a = be_bswap32(val);
+}
+
+static inline void buf_put_le32(void *_buf, u32 val)
+{
+ bufhelp_u32_t *out = _buf;
+ out->a = le_bswap32(val);
+}
+
+
+typedef struct bufhelp_u64_s
+{
+ u64 a;
+} __attribute__((packed, aligned(1), may_alias)) bufhelp_u64_t;
+
+/* Functions for loading and storing unaligned u64 values of different
+ endianness. */
+static inline u64 buf_get_be64(const void *_buf)
+{
+ return be_bswap64(((const bufhelp_u64_t *)_buf)->a);
+}
+
+static inline u64 buf_get_le64(const void *_buf)
+{
+ return le_bswap64(((const bufhelp_u64_t *)_buf)->a);
+}
+
+static inline void buf_put_be64(void *_buf, u64 val)
+{
+ bufhelp_u64_t *out = _buf;
+ out->a = be_bswap64(val);
+}
+
+static inline void buf_put_le64(void *_buf, u64 val)
+{
+ bufhelp_u64_t *out = _buf;
+ out->a = le_bswap64(val);
+}
+
+#endif /*BUFHELP_UNALIGNED_ACCESS*/
+
+
+/* Host-endian get/put macros */
+#ifdef WORDS_BIGENDIAN
+# define buf_get_he32 buf_get_be32
+# define buf_put_he32 buf_put_be32
+# define buf_get_he64 buf_get_be64
+# define buf_put_he64 buf_put_be64
+#else
+# define buf_get_he32 buf_get_le32
+# define buf_put_he32 buf_put_le32
+# define buf_get_he64 buf_get_le64
+# define buf_put_he64 buf_put_le64
+#endif
+
+
+
+/* Optimized function for small buffer copying */
+static inline void
+buf_cpy(void *_dst, const void *_src, size_t len)
+{
+ byte *dst = _dst;
+ const byte *src = _src;
+
+#if __GNUC__ >= 4
+ if (!__builtin_constant_p (len))
+ {
+ if (UNLIKELY(len == 0))
+ return;
+ memcpy(_dst, _src, len);
+ return;
+ }
+#endif
+
+ while (len >= sizeof(u64))
+ {
+ buf_put_he64(dst, buf_get_he64(src));
+ dst += sizeof(u64);
+ src += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len >= sizeof(u32))
+ {
+ buf_put_he32(dst, buf_get_he32(src));
+ dst += sizeof(u32);
+ src += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ /* Handle tail. */
+ for (; len; len--)
+ *dst++ = *src++;
+}
+
+
+/* Optimized function for buffer xoring */
+static inline void
+buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
+{
+ byte *dst = _dst;
+ const byte *src1 = _src1;
+ const byte *src2 = _src2;
+
+ while (len >= sizeof(u64))
+ {
+ buf_put_he64(dst, buf_get_he64(src1) ^ buf_get_he64(src2));
+ dst += sizeof(u64);
+ src1 += sizeof(u64);
+ src2 += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len > sizeof(u32))
+ {
+ buf_put_he32(dst, buf_get_he32(src1) ^ buf_get_he32(src2));
+ dst += sizeof(u32);
+ src1 += sizeof(u32);
+ src2 += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ /* Handle tail. */
+ for (; len; len--)
+ *dst++ = *src1++ ^ *src2++;
+}
+
+
+/* Optimized function for buffer xoring with two destination buffers. Used
+ mainly by CFB mode encryption. */
+static inline void
+buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
+{
+ byte *dst1 = _dst1;
+ byte *dst2 = _dst2;
+ const byte *src = _src;
+
+ while (len >= sizeof(u64))
+ {
+ u64 temp = buf_get_he64(dst2) ^ buf_get_he64(src);
+ buf_put_he64(dst2, temp);
+ buf_put_he64(dst1, temp);
+ dst2 += sizeof(u64);
+ dst1 += sizeof(u64);
+ src += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len >= sizeof(u32))
+ {
+ u32 temp = buf_get_he32(dst2) ^ buf_get_he32(src);
+ buf_put_he32(dst2, temp);
+ buf_put_he32(dst1, temp);
+ dst2 += sizeof(u32);
+ dst1 += sizeof(u32);
+ src += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ /* Handle tail. */
+ for (; len; len--)
+ *dst1++ = (*dst2++ ^= *src++);
+}
+
+
+/* Optimized function for combined buffer xoring and copying. Used by mainly
+ CBC mode decryption. */
+static inline void
+buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
+ const void *_src_cpy, size_t len)
+{
+ byte *dst_xor = _dst_xor;
+ byte *srcdst_cpy = _srcdst_cpy;
+ const byte *src_xor = _src_xor;
+ const byte *src_cpy = _src_cpy;
+
+ while (len >= sizeof(u64))
+ {
+ u64 temp = buf_get_he64(src_cpy);
+ buf_put_he64(dst_xor, buf_get_he64(srcdst_cpy) ^ buf_get_he64(src_xor));
+ buf_put_he64(srcdst_cpy, temp);
+ dst_xor += sizeof(u64);
+ srcdst_cpy += sizeof(u64);
+ src_xor += sizeof(u64);
+ src_cpy += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len >= sizeof(u32))
+ {
+ u32 temp = buf_get_he32(src_cpy);
+ buf_put_he32(dst_xor, buf_get_he32(srcdst_cpy) ^ buf_get_he32(src_xor));
+ buf_put_he32(srcdst_cpy, temp);
+ dst_xor += sizeof(u32);
+ srcdst_cpy += sizeof(u32);
+ src_xor += sizeof(u32);
+ src_cpy += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ /* Handle tail. */
+ for (; len; len--)
+ {
+ byte temp = *src_cpy++;
+ *dst_xor++ = *srcdst_cpy ^ *src_xor++;
+ *srcdst_cpy++ = temp;
+ }
+}
+
+
+/* Optimized function for combined buffer xoring and copying. Used by mainly
+ CFB mode decryption. */
+static inline void
+buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len)
+{
+ buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len);
+}
+
+
+/* Constant-time compare of two buffers. Returns 1 if buffers are equal,
+ and 0 if buffers differ. */
+static inline int
+buf_eq_const(const void *_a, const void *_b, size_t len)
+{
+ const byte *a = _a;
+ const byte *b = _b;
+ int ab, ba;
+ size_t i;
+
+ /* Constant-time compare. */
+ for (i = 0, ab = 0, ba = 0; i < len; i++)
+ {
+ /* If a[i] != b[i], either ab or ba will be negative. */
+ ab |= a[i] - b[i];
+ ba |= b[i] - a[i];
+ }
+
+ /* 'ab | ba' is negative when buffers are not equal. */
+ return (ab | ba) >= 0;
+}
+
+
+#endif /*GCRYPT_BUFHELP_H*/
diff --git a/comm/third_party/libgcrypt/cipher/camellia-aarch64.S b/comm/third_party/libgcrypt/cipher/camellia-aarch64.S
new file mode 100644
index 0000000000..f498086212
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/camellia-aarch64.S
@@ -0,0 +1,586 @@
+/* camellia-aarch64.S - ARMv8/AArch64 assembly implementation of Camellia
+ * cipher
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__)
+#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+
+.text
+
+/* struct camellia_ctx: */
+#define key_table 0
+
+/* register macros */
+#define CTX x0
+#define RDST x1
+#define RSRC x2
+#define RKEYBITS w3
+
+#define RTAB1 x4
+#define RTAB2 x5
+#define RTAB3 x6
+#define RTAB4 x7
+#define RMASK w8
+
+#define IL w9
+#define IR w10
+
+#define xIL x9
+#define xIR x10
+
+#define XL w11
+#define XR w12
+#define YL w13
+#define YR w14
+
+#define RT0 w15
+#define RT1 w16
+#define RT2 w17
+#define RT3 w19
+
+#define xRT0 x15
+#define xRT1 x16
+#define xRT2 x17
+#define xRT3 x19
+
+#ifdef __AARCH64EL__
+ #define host_to_be(reg, rtmp) \
+ rev reg, reg;
+ #define be_to_host(reg, rtmp) \
+ rev reg, reg;
+#else
+ /* nop on big-endian */
+ #define host_to_be(reg, rtmp) /*_*/
+ #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \
+ ldr a, [rin, #0]; \
+ ldr b, [rin, #4]; \
+ be_to_host(a, rtmp); \
+ ldr c, [rin, #8]; \
+ be_to_host(b, rtmp); \
+ ldr d, [rin, #12]; \
+ be_to_host(c, rtmp); \
+ be_to_host(d, rtmp);
+
+#define str_output_aligned_be(rout, a, b, c, d, rtmp) \
+ be_to_host(a, rtmp); \
+ be_to_host(b, rtmp); \
+ str a, [rout, #0]; \
+ be_to_host(c, rtmp); \
+ str b, [rout, #4]; \
+ be_to_host(d, rtmp); \
+ str c, [rout, #8]; \
+ str d, [rout, #12];
+
+/* unaligned word reads/writes allowed */
+#define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \
+ ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp)
+
+#define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+ str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0)
+
+/**********************************************************************
+ 1-way camellia
+ **********************************************************************/
+#define roundsm(xl, xr, kl, kr, yl, yr) \
+ ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \
+ and IR, RMASK, xr, lsl#(4); /*sp1110*/ \
+ ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \
+ and IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \
+ and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \
+ ldr IR, [RTAB1, xIR]; \
+ and RT1, RMASK, xl, lsr#(8 - 4); /*sp3033*/ \
+ eor yl, yl, RT2; \
+ ldr IL, [RTAB1, xIL]; \
+ eor yr, yr, RT3; \
+ \
+ ldr RT0, [RTAB3, xRT0]; \
+ ldr RT1, [RTAB3, xRT1]; \
+ \
+ and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \
+ and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \
+ \
+ eor IR, IR, RT0; \
+ eor IL, IL, RT1; \
+ \
+ ldr RT2, [RTAB2, xRT2]; \
+ and RT0, RMASK, xr, lsr#(8 - 4); /*sp4404*/ \
+ ldr RT3, [RTAB2, xRT3]; \
+ and RT1, RMASK, xl, lsl#(4); /*sp4404*/ \
+ \
+ ldr RT0, [RTAB4, xRT0]; \
+ ldr RT1, [RTAB4, xRT1]; \
+ \
+ eor IR, IR, RT2; \
+ eor IL, IL, RT3; \
+ eor IR, IR, RT0; \
+ eor IL, IL, RT1; \
+ \
+ eor IR, IR, IL; \
+ eor yr, yr, IL, ror#8; \
+ eor yl, yl, IR; \
+ eor yr, yr, IR;
+
+#define enc_rounds(n) \
+ roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR);
+
+#define dec_rounds(n) \
+ roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR);
+
+/* perform FL and FL⁻¹ */
+#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \
+ ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \
+ ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \
+ and RT0, RT0, ll; \
+ ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \
+ orr RT2, RT2, rr; \
+ ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \
+ eor rl, rl, RT2; \
+ eor lr, lr, RT0, ror#31; \
+ and RT3, RT3, rl; \
+ orr RT1, RT1, lr; \
+ eor ll, ll, RT1; \
+ eor rr, rr, RT3, ror#31;
+
+#define enc_fls(n) \
+ fls(XL, XR, YL, YR, \
+ (n) * 2 + 0, (n) * 2 + 1, \
+ (n) * 2 + 2, (n) * 2 + 3);
+
+#define dec_fls(n) \
+ fls(XL, XR, YL, YR, \
+ (n) * 2 + 2, (n) * 2 + 3, \
+ (n) * 2 + 0, (n) * 2 + 1);
+
+#define inpack(n) \
+ ldr_input_be(RSRC, XL, XR, YL, YR, RT0); \
+ ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+ eor XL, XL, RT0; \
+ eor XR, XR, RT1;
+
+#define outunpack(n) \
+ ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+ eor YL, YL, RT0; \
+ eor YR, YR, RT1; \
+ str_output_be(RDST, YL, YR, XL, XR, RT0, RT1);
+
+.globl _gcry_camellia_arm_encrypt_block
+ELF(.type _gcry_camellia_arm_encrypt_block,@function;)
+
+_gcry_camellia_arm_encrypt_block:
+ CFI_STARTPROC()
+ stp x19, x30, [sp, #-16]!
+ CFI_ADJUST_CFA_OFFSET(16)
+ CFI_REG_ON_STACK(19, 0)
+ CFI_REG_ON_STACK(30, 8)
+
+ /* input:
+ * x0: keytable
+ * x1: dst
+ * x2: src
+ * w3: keybitlen
+ */
+
+ adr RTAB1, _gcry_camellia_arm_tables;
+ mov RMASK, #(0xff<<4); /* byte mask */
+ add RTAB2, RTAB1, #(1 * 4);
+ add RTAB3, RTAB1, #(2 * 4);
+ add RTAB4, RTAB1, #(3 * 4);
+
+ inpack(0);
+
+ enc_rounds(0);
+ enc_fls(8);
+ enc_rounds(8);
+ enc_fls(16);
+ enc_rounds(16);
+
+ cmp RKEYBITS, #(16 * 8);
+ bne .Lenc_256;
+
+ outunpack(24);
+
+ CFI_REMEMBER_STATE()
+ ldp x19, x30, [sp], #16
+ CFI_ADJUST_CFA_OFFSET(-16)
+ CFI_RESTORE(x19)
+ CFI_RESTORE(x30)
+ ret;
+ CFI_RESTORE_STATE()
+.ltorg
+
+.Lenc_256:
+ enc_fls(24);
+ enc_rounds(24);
+
+ outunpack(32);
+
+ ldp x19, x30, [sp], #16
+ CFI_ADJUST_CFA_OFFSET(-16)
+ CFI_RESTORE(x19)
+ CFI_RESTORE(x30)
+ ret;
+ CFI_ENDPROC()
+.ltorg
+ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;)
+
+.globl _gcry_camellia_arm_decrypt_block
+ELF(.type _gcry_camellia_arm_decrypt_block,@function;)
+
+_gcry_camellia_arm_decrypt_block:
+ CFI_STARTPROC()
+ stp x19, x30, [sp, #-16]!
+ CFI_ADJUST_CFA_OFFSET(16)
+ CFI_REG_ON_STACK(19, 0)
+ CFI_REG_ON_STACK(30, 8)
+
+ /* input:
+ * x0: keytable
+ * x1: dst
+ * x2: src
+ * w3: keybitlen
+ */
+
+ adr RTAB1, _gcry_camellia_arm_tables;
+ mov RMASK, #(0xff<<4); /* byte mask */
+ add RTAB2, RTAB1, #(1 * 4);
+ add RTAB3, RTAB1, #(2 * 4);
+ add RTAB4, RTAB1, #(3 * 4);
+
+ cmp RKEYBITS, #(16 * 8);
+ bne .Ldec_256;
+
+ inpack(24);
+
+.Ldec_128:
+ dec_rounds(16);
+ dec_fls(16);
+ dec_rounds(8);
+ dec_fls(8);
+ dec_rounds(0);
+
+ outunpack(0);
+
+ CFI_REMEMBER_STATE()
+ ldp x19, x30, [sp], #16
+ CFI_ADJUST_CFA_OFFSET(-16)
+ CFI_RESTORE(x19)
+ CFI_RESTORE(x30)
+ ret;
+ CFI_RESTORE_STATE()
+.ltorg
+
+.Ldec_256:
+ inpack(32);
+ dec_rounds(24);
+ dec_fls(24);
+
+ b .Ldec_128;
+ CFI_ENDPROC()
+.ltorg
+ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;)
+
+/* Encryption/Decryption tables */
+ELF(.type _gcry_camellia_arm_tables,@object;)
+.balign 32
+_gcry_camellia_arm_tables:
+.Lcamellia_sp1110:
+.long 0x70707000
+.Lcamellia_sp0222:
+ .long 0x00e0e0e0
+.Lcamellia_sp3033:
+ .long 0x38003838
+.Lcamellia_sp4404:
+ .long 0x70700070
+.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c
+.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3
+.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0
+.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4
+.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057
+.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea
+.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae
+.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023
+.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b
+.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045
+.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5
+.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed
+.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f
+.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d
+.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092
+.long 0x23232300, 0x00464646, 0x91009191, 0x86860086
+.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af
+.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c
+.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f
+.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e
+.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc
+.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e
+.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b
+.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6
+.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039
+.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5
+.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d
+.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9
+.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a
+.long 0x92929200, 0x00252525, 0x49004949, 0x51510051
+.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c
+.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b
+.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a
+.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb
+.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0
+.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074
+.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b
+.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0
+.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084
+.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df
+.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb
+.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034
+.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076
+.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d
+.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9
+.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1
+.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004
+.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014
+.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a
+.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de
+.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011
+.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032
+.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c
+.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053
+.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2
+.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe
+.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf
+.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3
+.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a
+.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024
+.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8
+.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060
+.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069
+.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa
+.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0
+.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1
+.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062
+.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054
+.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e
+.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0
+.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064
+.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010
+.long 0x12121200, 0x00242424, 0x09000909, 0x00000000
+.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3
+.long 0x20202000, 0x00404040, 0x10001010, 0x75750075
+.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a
+.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6
+.long 0x84848400, 0x00090909, 0x42004242, 0x09090009
+.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd
+.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087
+.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083
+.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd
+.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090
+.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073
+.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6
+.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d
+.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf
+.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052
+.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8
+.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8
+.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6
+.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081
+.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f
+.long 0x04040400, 0x00080808, 0x02000202, 0x13130013
+.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063
+.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9
+.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7
+.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f
+.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc
+.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029
+.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9
+.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f
+.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4
+.long 0x32323200, 0x00646464, 0x19001919, 0x78780078
+.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006
+.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7
+.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071
+.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4
+.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab
+.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088
+.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d
+.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072
+.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9
+.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8
+.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac
+.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036
+.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a
+.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c
+.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1
+.long 0x24242400, 0x00484848, 0x12001212, 0x40400040
+.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3
+.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb
+.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043
+.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015
+.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad
+.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077
+.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080
+.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082
+.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec
+.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027
+.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5
+.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085
+.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035
+.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c
+.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041
+.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef
+.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093
+.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019
+.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021
+.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e
+.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e
+.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065
+.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd
+.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8
+.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f
+.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb
+.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce
+.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030
+.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f
+.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5
+.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a
+.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1
+.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca
+.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047
+.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d
+.long 0x09090900, 0x00121212, 0x84008484, 0x01010001
+.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6
+.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056
+.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d
+.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d
+.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066
+.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc
+.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d
+.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012
+.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020
+.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1
+.long 0x33333300, 0x00666666, 0x99009999, 0x99990099
+.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c
+.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2
+.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e
+.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005
+.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7
+.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031
+.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017
+.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7
+.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058
+.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061
+.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b
+.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c
+.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f
+.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016
+.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018
+.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022
+.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044
+.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2
+.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5
+.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091
+.long 0x13131300, 0x00262626, 0x89008989, 0x08080008
+.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8
+.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc
+.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050
+.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0
+.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d
+.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089
+.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097
+.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b
+.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095
+.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff
+.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2
+.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4
+.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048
+.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7
+.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db
+.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003
+.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da
+.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f
+.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094
+.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c
+.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002
+.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a
+.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033
+.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067
+.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3
+.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f
+.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2
+.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b
+.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026
+.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037
+.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b
+.long 0x88888800, 0x00111111, 0x44004444, 0x96960096
+.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b
+.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be
+.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e
+.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079
+.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c
+.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e
+.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e
+.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5
+.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6
+.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd
+.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059
+.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098
+.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a
+.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046
+.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba
+.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025
+.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042
+.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2
+.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa
+.long 0x40404000, 0x00808080, 0x20002020, 0x07070007
+.long 0x28282800, 0x00505050, 0x14001414, 0x55550055
+.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee
+.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a
+.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049
+.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068
+.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038
+.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4
+.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028
+.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b
+.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9
+.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1
+.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3
+.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4
+.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7
+.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e
+ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;)
+
+#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
+#endif /*__AARCH64EL__*/
diff --git a/comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S
new file mode 100644
index 0000000000..64cabaa51b
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S
@@ -0,0 +1,2618 @@
+/* camellia-avx-aesni-amd64.S - AES-NI/AVX implementation of Camellia cipher
+ *
+ * Copyright (C) 2013-2015,2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct CAMELLIA_context: */
+#define key_table 0
+#define key_bitlength CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/**********************************************************************
+ 16-way camellia
+ **********************************************************************/
+
+/*
+ * IN:
+ * x0..x7: byte-sliced AB state
+ * mem_cd: register pointer storing CD state
+ * key: index for key material
+ * OUT:
+ * x0..x7: new byte-sliced CD state
+ */
+#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
+ t7, mem_cd, key) \
+ /* \
+ * S-function with AES subbytes \
+ */ \
+ vmovdqa .Linv_shift_row rRIP, t4; \
+ vbroadcastss .L0f0f0f0f rRIP, t7; \
+ vmovdqa .Lpre_tf_lo_s1 rRIP, t0; \
+ vmovdqa .Lpre_tf_hi_s1 rRIP, t1; \
+ \
+ /* AES inverse shift rows */ \
+ vpshufb t4, x0, x0; \
+ vpshufb t4, x7, x7; \
+ vpshufb t4, x1, x1; \
+ vpshufb t4, x4, x4; \
+ vpshufb t4, x2, x2; \
+ vpshufb t4, x5, x5; \
+ vpshufb t4, x3, x3; \
+ vpshufb t4, x6, x6; \
+ \
+ /* prefilter sboxes 1, 2 and 3 */ \
+ vmovdqa .Lpre_tf_lo_s4 rRIP, t2; \
+ vmovdqa .Lpre_tf_hi_s4 rRIP, t3; \
+ filter_8bit(x0, t0, t1, t7, t6); \
+ filter_8bit(x7, t0, t1, t7, t6); \
+ filter_8bit(x1, t0, t1, t7, t6); \
+ filter_8bit(x4, t0, t1, t7, t6); \
+ filter_8bit(x2, t0, t1, t7, t6); \
+ filter_8bit(x5, t0, t1, t7, t6); \
+ \
+ /* prefilter sbox 4 */ \
+ vpxor t4, t4, t4; \
+ filter_8bit(x3, t2, t3, t7, t6); \
+ filter_8bit(x6, t2, t3, t7, t6); \
+ \
+ /* AES subbytes + AES shift rows */ \
+ vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \
+ vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \
+ vaesenclast t4, x0, x0; \
+ vaesenclast t4, x7, x7; \
+ vaesenclast t4, x1, x1; \
+ vaesenclast t4, x4, x4; \
+ vaesenclast t4, x2, x2; \
+ vaesenclast t4, x5, x5; \
+ vaesenclast t4, x3, x3; \
+ vaesenclast t4, x6, x6; \
+ \
+ /* postfilter sboxes 1 and 4 */ \
+ vmovdqa .Lpost_tf_lo_s3 rRIP, t2; \
+ vmovdqa .Lpost_tf_hi_s3 rRIP, t3; \
+ filter_8bit(x0, t0, t1, t7, t6); \
+ filter_8bit(x7, t0, t1, t7, t6); \
+ filter_8bit(x3, t0, t1, t7, t6); \
+ filter_8bit(x6, t0, t1, t7, t6); \
+ \
+ /* postfilter sbox 3 */ \
+ vmovdqa .Lpost_tf_lo_s2 rRIP, t4; \
+ vmovdqa .Lpost_tf_hi_s2 rRIP, t5; \
+ filter_8bit(x2, t2, t3, t7, t6); \
+ filter_8bit(x5, t2, t3, t7, t6); \
+ \
+ vpxor t6, t6, t6; \
+ vmovq key, t0; \
+ \
+ /* postfilter sbox 2 */ \
+ filter_8bit(x1, t4, t5, t7, t2); \
+ filter_8bit(x4, t4, t5, t7, t2); \
+ \
+ vpsrldq $5, t0, t5; \
+ vpsrldq $1, t0, t1; \
+ vpsrldq $2, t0, t2; \
+ vpsrldq $3, t0, t3; \
+ vpsrldq $4, t0, t4; \
+ vpshufb t6, t0, t0; \
+ vpshufb t6, t1, t1; \
+ vpshufb t6, t2, t2; \
+ vpshufb t6, t3, t3; \
+ vpshufb t6, t4, t4; \
+ vpsrldq $2, t5, t7; \
+ vpshufb t6, t7, t7; \
+ \
+ /* P-function */ \
+ vpxor x5, x0, x0; \
+ vpxor x6, x1, x1; \
+ vpxor x7, x2, x2; \
+ vpxor x4, x3, x3; \
+ \
+ vpxor x2, x4, x4; \
+ vpxor x3, x5, x5; \
+ vpxor x0, x6, x6; \
+ vpxor x1, x7, x7; \
+ \
+ vpxor x7, x0, x0; \
+ vpxor x4, x1, x1; \
+ vpxor x5, x2, x2; \
+ vpxor x6, x3, x3; \
+ \
+ vpxor x3, x4, x4; \
+ vpxor x0, x5, x5; \
+ vpxor x1, x6, x6; \
+ vpxor x2, x7, x7; /* note: high and low parts swapped */ \
+ \
+ /* Add key material and result to CD (x becomes new CD) */ \
+ \
+ vpxor t3, x4, x4; \
+ vpxor 0 * 16(mem_cd), x4, x4; \
+ \
+ vpxor t2, x5, x5; \
+ vpxor 1 * 16(mem_cd), x5, x5; \
+ \
+ vpsrldq $1, t5, t3; \
+ vpshufb t6, t5, t5; \
+ vpshufb t6, t3, t6; \
+ \
+ vpxor t1, x6, x6; \
+ vpxor 2 * 16(mem_cd), x6, x6; \
+ \
+ vpxor t0, x7, x7; \
+ vpxor 3 * 16(mem_cd), x7, x7; \
+ \
+ vpxor t7, x0, x0; \
+ vpxor 4 * 16(mem_cd), x0, x0; \
+ \
+ vpxor t6, x1, x1; \
+ vpxor 5 * 16(mem_cd), x1, x1; \
+ \
+ vpxor t5, x2, x2; \
+ vpxor 6 * 16(mem_cd), x2, x2; \
+ \
+ vpxor t4, x3, x3; \
+ vpxor 7 * 16(mem_cd), x3, x3;
+
+/*
+ * IN/OUT:
+ * x0..x7: byte-sliced AB state preloaded
+ * mem_ab: byte-sliced AB state in memory
+ * mem_cb: byte-sliced CD state in memory
+ */
+#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
+ roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \
+ \
+ vmovdqu x4, 0 * 16(mem_cd); \
+ vmovdqu x5, 1 * 16(mem_cd); \
+ vmovdqu x6, 2 * 16(mem_cd); \
+ vmovdqu x7, 3 * 16(mem_cd); \
+ vmovdqu x0, 4 * 16(mem_cd); \
+ vmovdqu x1, 5 * 16(mem_cd); \
+ vmovdqu x2, 6 * 16(mem_cd); \
+ vmovdqu x3, 7 * 16(mem_cd); \
+ \
+ roundsm16(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \
+ \
+ store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
+
+#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
+
+#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
+ /* Store new AB state */ \
+ vmovdqu x0, 0 * 16(mem_ab); \
+ vmovdqu x1, 1 * 16(mem_ab); \
+ vmovdqu x2, 2 * 16(mem_ab); \
+ vmovdqu x3, 3 * 16(mem_ab); \
+ vmovdqu x4, 4 * 16(mem_ab); \
+ vmovdqu x5, 5 * 16(mem_ab); \
+ vmovdqu x6, 6 * 16(mem_ab); \
+ vmovdqu x7, 7 * 16(mem_ab);
+
+#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i) \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
+
+#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i) \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
+
+/*
+ * IN:
+ * v0..3: byte-sliced 32-bit integers
+ * OUT:
+ * v0..3: (IN <<< 1)
+ */
+#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \
+ vpcmpgtb v0, zero, t0; \
+ vpaddb v0, v0, v0; \
+ vpabsb t0, t0; \
+ \
+ vpcmpgtb v1, zero, t1; \
+ vpaddb v1, v1, v1; \
+ vpabsb t1, t1; \
+ \
+ vpcmpgtb v2, zero, t2; \
+ vpaddb v2, v2, v2; \
+ vpabsb t2, t2; \
+ \
+ vpor t0, v1, v1; \
+ \
+ vpcmpgtb v3, zero, t0; \
+ vpaddb v3, v3, v3; \
+ vpabsb t0, t0; \
+ \
+ vpor t1, v2, v2; \
+ vpor t2, v3, v3; \
+ vpor t0, v0, v0;
+
+/*
+ * IN:
+ * r: byte-sliced AB state in memory
+ * l: byte-sliced CD state in memory
+ * OUT:
+ * x0..x7: new byte-sliced CD state
+ */
+#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
+ tt1, tt2, tt3, kll, klr, krl, krr) \
+ /* \
+ * t0 = kll; \
+ * t0 &= ll; \
+ * lr ^= rol32(t0, 1); \
+ */ \
+ vpxor tt0, tt0, tt0; \
+ vmovd kll, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpand l0, t0, t0; \
+ vpand l1, t1, t1; \
+ vpand l2, t2, t2; \
+ vpand l3, t3, t3; \
+ \
+ rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+ \
+ vpxor l4, t0, l4; \
+ vmovdqu l4, 4 * 16(l); \
+ vpxor l5, t1, l5; \
+ vmovdqu l5, 5 * 16(l); \
+ vpxor l6, t2, l6; \
+ vmovdqu l6, 6 * 16(l); \
+ vpxor l7, t3, l7; \
+ vmovdqu l7, 7 * 16(l); \
+ \
+ /* \
+ * t2 = krr; \
+ * t2 |= rr; \
+ * rl ^= t2; \
+ */ \
+ \
+ vmovd krr, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpor 4 * 16(r), t0, t0; \
+ vpor 5 * 16(r), t1, t1; \
+ vpor 6 * 16(r), t2, t2; \
+ vpor 7 * 16(r), t3, t3; \
+ \
+ vpxor 0 * 16(r), t0, t0; \
+ vpxor 1 * 16(r), t1, t1; \
+ vpxor 2 * 16(r), t2, t2; \
+ vpxor 3 * 16(r), t3, t3; \
+ vmovdqu t0, 0 * 16(r); \
+ vmovdqu t1, 1 * 16(r); \
+ vmovdqu t2, 2 * 16(r); \
+ vmovdqu t3, 3 * 16(r); \
+ \
+ /* \
+ * t2 = krl; \
+ * t2 &= rl; \
+ * rr ^= rol32(t2, 1); \
+ */ \
+ vmovd krl, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpand 0 * 16(r), t0, t0; \
+ vpand 1 * 16(r), t1, t1; \
+ vpand 2 * 16(r), t2, t2; \
+ vpand 3 * 16(r), t3, t3; \
+ \
+ rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+ \
+ vpxor 4 * 16(r), t0, t0; \
+ vpxor 5 * 16(r), t1, t1; \
+ vpxor 6 * 16(r), t2, t2; \
+ vpxor 7 * 16(r), t3, t3; \
+ vmovdqu t0, 4 * 16(r); \
+ vmovdqu t1, 5 * 16(r); \
+ vmovdqu t2, 6 * 16(r); \
+ vmovdqu t3, 7 * 16(r); \
+ \
+ /* \
+ * t0 = klr; \
+ * t0 |= lr; \
+ * ll ^= t0; \
+ */ \
+ \
+ vmovd klr, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpor l4, t0, t0; \
+ vpor l5, t1, t1; \
+ vpor l6, t2, t2; \
+ vpor l7, t3, t3; \
+ \
+ vpxor l0, t0, l0; \
+ vmovdqu l0, 0 * 16(l); \
+ vpxor l1, t1, l1; \
+ vmovdqu l1, 1 * 16(l); \
+ vpxor l2, t2, l2; \
+ vmovdqu l2, 2 * 16(l); \
+ vpxor l3, t3, l3; \
+ vmovdqu l3, 3 * 16(l);
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \
+ a3, b3, c3, d3, st0, st1) \
+ vmovdqu d2, st0; \
+ vmovdqu d3, st1; \
+ transpose_4x4(a0, a1, a2, a3, d2, d3); \
+ transpose_4x4(b0, b1, b2, b3, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu a0, st0; \
+ vmovdqu a1, st1; \
+ transpose_4x4(c0, c1, c2, c3, a0, a1); \
+ transpose_4x4(d0, d1, d2, d3, a0, a1); \
+ \
+ vmovdqu .Lshufb_16x16b rRIP, a0; \
+ vmovdqu st1, a1; \
+ vpshufb a0, a2, a2; \
+ vpshufb a0, a3, a3; \
+ vpshufb a0, b0, b0; \
+ vpshufb a0, b1, b1; \
+ vpshufb a0, b2, b2; \
+ vpshufb a0, b3, b3; \
+ vpshufb a0, a1, a1; \
+ vpshufb a0, c0, c0; \
+ vpshufb a0, c1, c1; \
+ vpshufb a0, c2, c2; \
+ vpshufb a0, c3, c3; \
+ vpshufb a0, d0, d0; \
+ vpshufb a0, d1, d1; \
+ vpshufb a0, d2, d2; \
+ vpshufb a0, d3, d3; \
+ vmovdqu d3, st1; \
+ vmovdqu st0, d3; \
+ vpshufb a0, d3, a0; \
+ vmovdqu d2, st0; \
+ \
+ transpose_4x4(a0, b0, c0, d0, d2, d3); \
+ transpose_4x4(a1, b1, c1, d1, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu b0, st0; \
+ vmovdqu b1, st1; \
+ transpose_4x4(a2, b2, c2, d2, b0, b1); \
+ transpose_4x4(a3, b3, c3, d3, b0, b1); \
+ vmovdqu st0, b0; \
+ vmovdqu st1, b1; \
+ /* does not adjust output bytes inside vectors */
+
+#define transpose_8x8b(a, b, c, d, e, f, g, h, t0, t1, t2, t3, t4) \
+ vpunpcklbw a, b, t0; \
+ vpunpckhbw a, b, b; \
+ \
+ vpunpcklbw c, d, t1; \
+ vpunpckhbw c, d, d; \
+ \
+ vpunpcklbw e, f, t2; \
+ vpunpckhbw e, f, f; \
+ \
+ vpunpcklbw g, h, t3; \
+ vpunpckhbw g, h, h; \
+ \
+ vpunpcklwd t0, t1, g; \
+ vpunpckhwd t0, t1, t0; \
+ \
+ vpunpcklwd b, d, t1; \
+ vpunpckhwd b, d, e; \
+ \
+ vpunpcklwd t2, t3, c; \
+ vpunpckhwd t2, t3, t2; \
+ \
+ vpunpcklwd f, h, t3; \
+ vpunpckhwd f, h, b; \
+ \
+ vpunpcklwd e, b, t4; \
+ vpunpckhwd e, b, b; \
+ \
+ vpunpcklwd t1, t3, e; \
+ vpunpckhwd t1, t3, f; \
+ \
+ vmovdqa .Ltranspose_8x8_shuf rRIP, t3; \
+ \
+ vpunpcklwd g, c, d; \
+ vpunpckhwd g, c, c; \
+ \
+ vpunpcklwd t0, t2, t1; \
+ vpunpckhwd t0, t2, h; \
+ \
+ vpunpckhqdq b, h, a; \
+ vpshufb t3, a, a; \
+ vpunpcklqdq b, h, b; \
+ vpshufb t3, b, b; \
+ \
+ vpunpckhqdq e, d, g; \
+ vpshufb t3, g, g; \
+ vpunpcklqdq e, d, h; \
+ vpshufb t3, h, h; \
+ \
+ vpunpckhqdq f, c, e; \
+ vpshufb t3, e, e; \
+ vpunpcklqdq f, c, f; \
+ vpshufb t3, f, f; \
+ \
+ vpunpckhqdq t4, t1, c; \
+ vpshufb t3, c, c; \
+ vpunpcklqdq t4, t1, d; \
+ vpshufb t3, d, d;
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, rio, key) \
+ vmovq key, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
+ \
+ vpxor 0 * 16(rio), x0, y7; \
+ vpxor 1 * 16(rio), x0, y6; \
+ vpxor 2 * 16(rio), x0, y5; \
+ vpxor 3 * 16(rio), x0, y4; \
+ vpxor 4 * 16(rio), x0, y3; \
+ vpxor 5 * 16(rio), x0, y2; \
+ vpxor 6 * 16(rio), x0, y1; \
+ vpxor 7 * 16(rio), x0, y0; \
+ vpxor 8 * 16(rio), x0, x7; \
+ vpxor 9 * 16(rio), x0, x6; \
+ vpxor 10 * 16(rio), x0, x5; \
+ vpxor 11 * 16(rio), x0, x4; \
+ vpxor 12 * 16(rio), x0, x3; \
+ vpxor 13 * 16(rio), x0, x2; \
+ vpxor 14 * 16(rio), x0, x1; \
+ vpxor 15 * 16(rio), x0, x0;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd) \
+ byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \
+ y4, y5, y6, y7, (mem_ab), (mem_cd)); \
+ \
+ vmovdqu x0, 0 * 16(mem_ab); \
+ vmovdqu x1, 1 * 16(mem_ab); \
+ vmovdqu x2, 2 * 16(mem_ab); \
+ vmovdqu x3, 3 * 16(mem_ab); \
+ vmovdqu x4, 4 * 16(mem_ab); \
+ vmovdqu x5, 5 * 16(mem_ab); \
+ vmovdqu x6, 6 * 16(mem_ab); \
+ vmovdqu x7, 7 * 16(mem_ab); \
+ vmovdqu y0, 0 * 16(mem_cd); \
+ vmovdqu y1, 1 * 16(mem_cd); \
+ vmovdqu y2, 2 * 16(mem_cd); \
+ vmovdqu y3, 3 * 16(mem_cd); \
+ vmovdqu y4, 4 * 16(mem_cd); \
+ vmovdqu y5, 5 * 16(mem_cd); \
+ vmovdqu y6, 6 * 16(mem_cd); \
+ vmovdqu y7, 7 * 16(mem_cd);
+
+/* de-byteslice, apply post-whitening and store blocks */
+#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
+ y5, y6, y7, key, stack_tmp0, stack_tmp1) \
+ byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \
+ y3, y7, x3, x7, stack_tmp0, stack_tmp1); \
+ \
+ vmovdqu x0, stack_tmp0; \
+ \
+ vmovq key, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
+ \
+ vpxor x0, y7, y7; \
+ vpxor x0, y6, y6; \
+ vpxor x0, y5, y5; \
+ vpxor x0, y4, y4; \
+ vpxor x0, y3, y3; \
+ vpxor x0, y2, y2; \
+ vpxor x0, y1, y1; \
+ vpxor x0, y0, y0; \
+ vpxor x0, x7, x7; \
+ vpxor x0, x6, x6; \
+ vpxor x0, x5, x5; \
+ vpxor x0, x4, x4; \
+ vpxor x0, x3, x3; \
+ vpxor x0, x2, x2; \
+ vpxor x0, x1, x1; \
+ vpxor stack_tmp0, x0, x0;
+
+#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, rio) \
+ vmovdqu x0, 0 * 16(rio); \
+ vmovdqu x1, 1 * 16(rio); \
+ vmovdqu x2, 2 * 16(rio); \
+ vmovdqu x3, 3 * 16(rio); \
+ vmovdqu x4, 4 * 16(rio); \
+ vmovdqu x5, 5 * 16(rio); \
+ vmovdqu x6, 6 * 16(rio); \
+ vmovdqu x7, 7 * 16(rio); \
+ vmovdqu y0, 8 * 16(rio); \
+ vmovdqu y1, 9 * 16(rio); \
+ vmovdqu y2, 10 * 16(rio); \
+ vmovdqu y3, 11 * 16(rio); \
+ vmovdqu y4, 12 * 16(rio); \
+ vmovdqu y5, 13 * 16(rio); \
+ vmovdqu y6, 14 * 16(rio); \
+ vmovdqu y7, 15 * 16(rio);
+
+.text
+.align 16
+
+#define SHUFB_BYTES(idx) \
+ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+
+.Lshufb_16x16b:
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3);
+
+.Lpack_bswap:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x80808080
+ .long 0x80808080
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox1, sbox2, sbox3:
+ * swap_bitendianness(
+ * isom_map_camellia_to_aes(
+ * camellia_f(
+ * swap_bitendianess(in)
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s1:
+ .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
+ .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
+.Lpre_tf_hi_s1:
+ .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
+ .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox4:
+ * swap_bitendianness(
+ * isom_map_camellia_to_aes(
+ * camellia_f(
+ * swap_bitendianess(in <<< 1)
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s4:
+ .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
+ .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
+.Lpre_tf_hi_s4:
+ .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
+ .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox1, sbox4:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s1:
+ .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
+ .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
+.Lpost_tf_hi_s1:
+ .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
+ .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox2:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * ) <<< 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s2:
+ .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
+ .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
+.Lpost_tf_hi_s2:
+ .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
+ .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox3:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * ) >>> 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s3:
+ .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
+ .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
+.Lpost_tf_hi_s3:
+ .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
+ .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* shuffle mask for 8x8 byte transpose */
+.Ltranspose_8x8_shuf:
+ .byte 0, 1, 4, 5, 2, 3, 6, 7, 8+0, 8+1, 8+4, 8+5, 8+2, 8+3, 8+6, 8+7
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+
+.align 8
+ELF(.type __camellia_enc_blk16,@function;)
+
+__camellia_enc_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rax: temporary storage, 256 bytes
+ * %r8d: 24 for 16 byte key, 32 for larger
+ * %xmm0..%xmm15: 16 plaintext blocks
+ * output:
+ * %xmm0..%xmm15: 16 encrypted blocks, order swapped:
+ * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+ */
+ CFI_STARTPROC();
+
+ leaq 8 * 16(%rax), %rcx;
+
+ leaq (-8 * 8)(CTX, %r8, 8), %r8;
+
+ inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx);
+
+.align 8
+.Lenc_loop:
+ enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 0);
+
+ cmpq %r8, CTX;
+ je .Lenc_done;
+ leaq (8 * 8)(CTX), CTX;
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table) + 0)(CTX),
+ ((key_table) + 4)(CTX),
+ ((key_table) + 8)(CTX),
+ ((key_table) + 12)(CTX));
+ jmp .Lenc_loop;
+
+.align 8
+.Lenc_done:
+ /* load CD for output */
+ vmovdqu 0 * 16(%rcx), %xmm8;
+ vmovdqu 1 * 16(%rcx), %xmm9;
+ vmovdqu 2 * 16(%rcx), %xmm10;
+ vmovdqu 3 * 16(%rcx), %xmm11;
+ vmovdqu 4 * 16(%rcx), %xmm12;
+ vmovdqu 5 * 16(%rcx), %xmm13;
+ vmovdqu 6 * 16(%rcx), %xmm14;
+ vmovdqu 7 * 16(%rcx), %xmm15;
+
+ outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 16(%rax));
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
+
+.align 8
+ELF(.type __camellia_dec_blk16,@function;)
+
+__camellia_dec_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rax: temporary storage, 256 bytes
+ * %r8d: 24 for 16 byte key, 32 for larger
+ * %xmm0..%xmm15: 16 encrypted blocks
+ * output:
+ * %xmm0..%xmm15: 16 plaintext blocks, order swapped:
+ * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+ */
+ CFI_STARTPROC();
+
+ movq %r8, %rcx;
+ movq CTX, %r8
+ leaq (-8 * 8)(CTX, %rcx, 8), CTX;
+
+ leaq 8 * 16(%rax), %rcx;
+
+ inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx);
+
+.align 8
+.Ldec_loop:
+ dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 0);
+
+ cmpq %r8, CTX;
+ je .Ldec_done;
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table) + 8)(CTX),
+ ((key_table) + 12)(CTX),
+ ((key_table) + 0)(CTX),
+ ((key_table) + 4)(CTX));
+
+ leaq (-8 * 8)(CTX), CTX;
+ jmp .Ldec_loop;
+
+.align 8
+.Ldec_done:
+ /* load CD for output */
+ vmovdqu 0 * 16(%rcx), %xmm8;
+ vmovdqu 1 * 16(%rcx), %xmm9;
+ vmovdqu 2 * 16(%rcx), %xmm10;
+ vmovdqu 3 * 16(%rcx), %xmm11;
+ vmovdqu 4 * 16(%rcx), %xmm12;
+ vmovdqu 5 * 16(%rcx), %xmm13;
+ vmovdqu 6 * 16(%rcx), %xmm14;
+ vmovdqu 7 * 16(%rcx), %xmm15;
+
+ outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ctr_enc
+ELF(.type _gcry_camellia_aesni_avx_ctr_enc,@function;)
+
+_gcry_camellia_aesni_avx_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ subq $(16 * 16), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ vmovdqa .Lbswap128_mask rRIP, %xmm14;
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), %xmm15;
+ vmovdqu %xmm15, 15 * 16(%rax);
+ vpshufb %xmm14, %xmm15, %xmm0; /* be => le */
+
+ vpcmpeqd %xmm15, %xmm15, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */
+
+ /* construct IVs */
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm13;
+ vmovdqu %xmm13, 14 * 16(%rax);
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm13;
+ vmovdqu %xmm13, 13 * 16(%rax);
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm12;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm11;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm10;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm9;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm8;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm7;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm6;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm5;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm4;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm3;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm2;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm1;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vmovdqa %xmm0, %xmm13;
+ vpshufb %xmm14, %xmm0, %xmm0;
+ inc_le128(%xmm13, %xmm15, %xmm14);
+ vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; /* le => be */
+ vmovdqu %xmm13, (%rcx);
+
+ /* inpack16_pre: */
+ vmovq (key_table)(CTX), %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+ vpxor %xmm0, %xmm15, %xmm0;
+ vpxor %xmm1, %xmm15, %xmm1;
+ vpxor %xmm2, %xmm15, %xmm2;
+ vpxor %xmm3, %xmm15, %xmm3;
+ vpxor %xmm4, %xmm15, %xmm4;
+ vpxor %xmm5, %xmm15, %xmm5;
+ vpxor %xmm6, %xmm15, %xmm6;
+ vpxor %xmm7, %xmm15, %xmm7;
+ vpxor %xmm8, %xmm15, %xmm8;
+ vpxor %xmm9, %xmm15, %xmm9;
+ vpxor %xmm10, %xmm15, %xmm10;
+ vpxor %xmm11, %xmm15, %xmm11;
+ vpxor %xmm12, %xmm15, %xmm12;
+ vpxor 13 * 16(%rax), %xmm15, %xmm13;
+ vpxor 14 * 16(%rax), %xmm15, %xmm14;
+ vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+ call __camellia_enc_blk16;
+
+ vpxor 0 * 16(%rdx), %xmm7, %xmm7;
+ vpxor 1 * 16(%rdx), %xmm6, %xmm6;
+ vpxor 2 * 16(%rdx), %xmm5, %xmm5;
+ vpxor 3 * 16(%rdx), %xmm4, %xmm4;
+ vpxor 4 * 16(%rdx), %xmm3, %xmm3;
+ vpxor 5 * 16(%rdx), %xmm2, %xmm2;
+ vpxor 6 * 16(%rdx), %xmm1, %xmm1;
+ vpxor 7 * 16(%rdx), %xmm0, %xmm0;
+ vpxor 8 * 16(%rdx), %xmm15, %xmm15;
+ vpxor 9 * 16(%rdx), %xmm14, %xmm14;
+ vpxor 10 * 16(%rdx), %xmm13, %xmm13;
+ vpxor 11 * 16(%rdx), %xmm12, %xmm12;
+ vpxor 12 * 16(%rdx), %xmm11, %xmm11;
+ vpxor 13 * 16(%rdx), %xmm10, %xmm10;
+ vpxor 14 * 16(%rdx), %xmm9, %xmm9;
+ vpxor 15 * 16(%rdx), %xmm8, %xmm8;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_cbc_dec
+ELF(.type _gcry_camellia_aesni_avx_cbc_dec,@function;)
+
+_gcry_camellia_aesni_avx_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ movq %rcx, %r9;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX, %r8, 8));
+
+ subq $(16 * 16), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ call __camellia_dec_blk16;
+
+ /* XOR output with IV */
+ vpxor (%r9), %xmm7, %xmm7;
+ vpxor (0 * 16)(%rdx), %xmm6, %xmm6;
+ vpxor (1 * 16)(%rdx), %xmm5, %xmm5;
+ vpxor (2 * 16)(%rdx), %xmm4, %xmm4;
+ vpxor (3 * 16)(%rdx), %xmm3, %xmm3;
+ vpxor (4 * 16)(%rdx), %xmm2, %xmm2;
+ vpxor (5 * 16)(%rdx), %xmm1, %xmm1;
+ vpxor (6 * 16)(%rdx), %xmm0, %xmm0;
+ vpxor (7 * 16)(%rdx), %xmm15, %xmm15;
+ vpxor (8 * 16)(%rdx), %xmm14, %xmm14;
+ vpxor (9 * 16)(%rdx), %xmm13, %xmm13;
+ vpxor (10 * 16)(%rdx), %xmm12, %xmm12;
+ vpxor (11 * 16)(%rdx), %xmm11, %xmm11;
+ vpxor (12 * 16)(%rdx), %xmm10, %xmm10;
+ vpxor (13 * 16)(%rdx), %xmm9, %xmm9;
+ vpxor (14 * 16)(%rdx), %xmm8, %xmm8;
+ movq (15 * 16 + 0)(%rdx), %r10;
+ movq (15 * 16 + 8)(%rdx), %r11;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ /* store new IV */
+ movq %r10, (0)(%r9);
+ movq %r11, (8)(%r9);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_cfb_dec
+ELF(.type _gcry_camellia_aesni_avx_cfb_dec,@function;)
+
+_gcry_camellia_aesni_avx_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ subq $(16 * 16), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ /* inpack16_pre: */
+ vmovq (key_table)(CTX), %xmm0;
+ vpshufb .Lpack_bswap rRIP, %xmm0, %xmm0;
+ vpxor (%rcx), %xmm0, %xmm15;
+ vmovdqu 15 * 16(%rdx), %xmm1;
+ vmovdqu %xmm1, (%rcx); /* store new IV */
+ vpxor 0 * 16(%rdx), %xmm0, %xmm14;
+ vpxor 1 * 16(%rdx), %xmm0, %xmm13;
+ vpxor 2 * 16(%rdx), %xmm0, %xmm12;
+ vpxor 3 * 16(%rdx), %xmm0, %xmm11;
+ vpxor 4 * 16(%rdx), %xmm0, %xmm10;
+ vpxor 5 * 16(%rdx), %xmm0, %xmm9;
+ vpxor 6 * 16(%rdx), %xmm0, %xmm8;
+ vpxor 7 * 16(%rdx), %xmm0, %xmm7;
+ vpxor 8 * 16(%rdx), %xmm0, %xmm6;
+ vpxor 9 * 16(%rdx), %xmm0, %xmm5;
+ vpxor 10 * 16(%rdx), %xmm0, %xmm4;
+ vpxor 11 * 16(%rdx), %xmm0, %xmm3;
+ vpxor 12 * 16(%rdx), %xmm0, %xmm2;
+ vpxor 13 * 16(%rdx), %xmm0, %xmm1;
+ vpxor 14 * 16(%rdx), %xmm0, %xmm0;
+
+ call __camellia_enc_blk16;
+
+ vpxor 0 * 16(%rdx), %xmm7, %xmm7;
+ vpxor 1 * 16(%rdx), %xmm6, %xmm6;
+ vpxor 2 * 16(%rdx), %xmm5, %xmm5;
+ vpxor 3 * 16(%rdx), %xmm4, %xmm4;
+ vpxor 4 * 16(%rdx), %xmm3, %xmm3;
+ vpxor 5 * 16(%rdx), %xmm2, %xmm2;
+ vpxor 6 * 16(%rdx), %xmm1, %xmm1;
+ vpxor 7 * 16(%rdx), %xmm0, %xmm0;
+ vpxor 8 * 16(%rdx), %xmm15, %xmm15;
+ vpxor 9 * 16(%rdx), %xmm14, %xmm14;
+ vpxor 10 * 16(%rdx), %xmm13, %xmm13;
+ vpxor 11 * 16(%rdx), %xmm12, %xmm12;
+ vpxor 12 * 16(%rdx), %xmm11, %xmm11;
+ vpxor 13 * 16(%rdx), %xmm10, %xmm10;
+ vpxor 14 * 16(%rdx), %xmm9, %xmm9;
+ vpxor 15 * 16(%rdx), %xmm8, %xmm8;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ocb_enc
+ELF(.type _gcry_camellia_aesni_avx_ocb_enc,@function;)
+
+_gcry_camellia_aesni_avx_ocb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ subq $(16 * 16 + 4 * 8), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ movq %r10, (16 * 16 + 0 * 8)(%rsp);
+ movq %r11, (16 * 16 + 1 * 8)(%rsp);
+ movq %r12, (16 * 16 + 2 * 8)(%rsp);
+ movq %r13, (16 * 16 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
+
+ vmovdqu (%rcx), %xmm14;
+ vmovdqu (%r8), %xmm15;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ vmovdqu (n * 16)(%rdx), xreg; \
+ vpxor (lreg), %xmm14, %xmm14; \
+ vpxor xreg, %xmm15, %xmm15; \
+ vpxor xreg, %xmm14, xreg; \
+ vmovdqu %xmm14, (n * 16)(%rsi);
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %xmm0);
+ vmovdqu %xmm0, (15 * 16)(%rax);
+ OCB_INPUT(1, %r11, %xmm0);
+ vmovdqu %xmm0, (14 * 16)(%rax);
+ OCB_INPUT(2, %r12, %xmm13);
+ OCB_INPUT(3, %r13, %xmm12);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %xmm11);
+ OCB_INPUT(5, %r11, %xmm10);
+ OCB_INPUT(6, %r12, %xmm9);
+ OCB_INPUT(7, %r13, %xmm8);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(8, %r10, %xmm7);
+ OCB_INPUT(9, %r11, %xmm6);
+ OCB_INPUT(10, %r12, %xmm5);
+ OCB_INPUT(11, %r13, %xmm4);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(12, %r10, %xmm3);
+ OCB_INPUT(13, %r11, %xmm2);
+ OCB_INPUT(14, %r12, %xmm1);
+ OCB_INPUT(15, %r13, %xmm0);
+#undef OCB_INPUT
+
+ vmovdqu %xmm14, (%rcx);
+ vmovdqu %xmm15, (%r8);
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %r10d;
+ cmovel %r10d, %r8d; /* max */
+
+ /* inpack16_pre: */
+ vmovq (key_table)(CTX), %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+ vpxor %xmm0, %xmm15, %xmm0;
+ vpxor %xmm1, %xmm15, %xmm1;
+ vpxor %xmm2, %xmm15, %xmm2;
+ vpxor %xmm3, %xmm15, %xmm3;
+ vpxor %xmm4, %xmm15, %xmm4;
+ vpxor %xmm5, %xmm15, %xmm5;
+ vpxor %xmm6, %xmm15, %xmm6;
+ vpxor %xmm7, %xmm15, %xmm7;
+ vpxor %xmm8, %xmm15, %xmm8;
+ vpxor %xmm9, %xmm15, %xmm9;
+ vpxor %xmm10, %xmm15, %xmm10;
+ vpxor %xmm11, %xmm15, %xmm11;
+ vpxor %xmm12, %xmm15, %xmm12;
+ vpxor %xmm13, %xmm15, %xmm13;
+ vpxor 14 * 16(%rax), %xmm15, %xmm14;
+ vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+ call __camellia_enc_blk16;
+
+ vpxor 0 * 16(%rsi), %xmm7, %xmm7;
+ vpxor 1 * 16(%rsi), %xmm6, %xmm6;
+ vpxor 2 * 16(%rsi), %xmm5, %xmm5;
+ vpxor 3 * 16(%rsi), %xmm4, %xmm4;
+ vpxor 4 * 16(%rsi), %xmm3, %xmm3;
+ vpxor 5 * 16(%rsi), %xmm2, %xmm2;
+ vpxor 6 * 16(%rsi), %xmm1, %xmm1;
+ vpxor 7 * 16(%rsi), %xmm0, %xmm0;
+ vpxor 8 * 16(%rsi), %xmm15, %xmm15;
+ vpxor 9 * 16(%rsi), %xmm14, %xmm14;
+ vpxor 10 * 16(%rsi), %xmm13, %xmm13;
+ vpxor 11 * 16(%rsi), %xmm12, %xmm12;
+ vpxor 12 * 16(%rsi), %xmm11, %xmm11;
+ vpxor 13 * 16(%rsi), %xmm10, %xmm10;
+ vpxor 14 * 16(%rsi), %xmm9, %xmm9;
+ vpxor 15 * 16(%rsi), %xmm8, %xmm8;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ movq (16 * 16 + 0 * 8)(%rsp), %r10;
+ movq (16 * 16 + 1 * 8)(%rsp), %r11;
+ movq (16 * 16 + 2 * 8)(%rsp), %r12;
+ movq (16 * 16 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ocb_dec
+ELF(.type _gcry_camellia_aesni_avx_ocb_dec,@function;)
+
+_gcry_camellia_aesni_avx_ocb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ subq $(16 * 16 + 4 * 8), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ movq %r10, (16 * 16 + 0 * 8)(%rsp);
+ movq %r11, (16 * 16 + 1 * 8)(%rsp);
+ movq %r12, (16 * 16 + 2 * 8)(%rsp);
+ movq %r13, (16 * 16 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
+
+ vmovdqu (%rcx), %xmm15;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ vmovdqu (n * 16)(%rdx), xreg; \
+ vpxor (lreg), %xmm15, %xmm15; \
+ vpxor xreg, %xmm15, xreg; \
+ vmovdqu %xmm15, (n * 16)(%rsi);
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %xmm0);
+ vmovdqu %xmm0, (15 * 16)(%rax);
+ OCB_INPUT(1, %r11, %xmm14);
+ OCB_INPUT(2, %r12, %xmm13);
+ OCB_INPUT(3, %r13, %xmm12);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %xmm11);
+ OCB_INPUT(5, %r11, %xmm10);
+ OCB_INPUT(6, %r12, %xmm9);
+ OCB_INPUT(7, %r13, %xmm8);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(8, %r10, %xmm7);
+ OCB_INPUT(9, %r11, %xmm6);
+ OCB_INPUT(10, %r12, %xmm5);
+ OCB_INPUT(11, %r13, %xmm4);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(12, %r10, %xmm3);
+ OCB_INPUT(13, %r11, %xmm2);
+ OCB_INPUT(14, %r12, %xmm1);
+ OCB_INPUT(15, %r13, %xmm0);
+#undef OCB_INPUT
+
+ vmovdqu %xmm15, (%rcx);
+
+ movq %r8, %r10;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %r9d;
+ cmovel %r9d, %r8d; /* max */
+
+ /* inpack16_pre: */
+ vmovq (key_table)(CTX, %r8, 8), %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+ vpxor %xmm0, %xmm15, %xmm0;
+ vpxor %xmm1, %xmm15, %xmm1;
+ vpxor %xmm2, %xmm15, %xmm2;
+ vpxor %xmm3, %xmm15, %xmm3;
+ vpxor %xmm4, %xmm15, %xmm4;
+ vpxor %xmm5, %xmm15, %xmm5;
+ vpxor %xmm6, %xmm15, %xmm6;
+ vpxor %xmm7, %xmm15, %xmm7;
+ vpxor %xmm8, %xmm15, %xmm8;
+ vpxor %xmm9, %xmm15, %xmm9;
+ vpxor %xmm10, %xmm15, %xmm10;
+ vpxor %xmm11, %xmm15, %xmm11;
+ vpxor %xmm12, %xmm15, %xmm12;
+ vpxor %xmm13, %xmm15, %xmm13;
+ vpxor %xmm14, %xmm15, %xmm14;
+ vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+ call __camellia_dec_blk16;
+
+ vpxor 0 * 16(%rsi), %xmm7, %xmm7;
+ vpxor 1 * 16(%rsi), %xmm6, %xmm6;
+ vpxor 2 * 16(%rsi), %xmm5, %xmm5;
+ vpxor 3 * 16(%rsi), %xmm4, %xmm4;
+ vpxor 4 * 16(%rsi), %xmm3, %xmm3;
+ vpxor 5 * 16(%rsi), %xmm2, %xmm2;
+ vpxor 6 * 16(%rsi), %xmm1, %xmm1;
+ vpxor 7 * 16(%rsi), %xmm0, %xmm0;
+ vmovdqu %xmm7, (7 * 16)(%rax);
+ vpxor 8 * 16(%rsi), %xmm15, %xmm15;
+ vpxor 9 * 16(%rsi), %xmm14, %xmm14;
+ vpxor 10 * 16(%rsi), %xmm13, %xmm13;
+ vpxor 11 * 16(%rsi), %xmm12, %xmm12;
+ vpxor 12 * 16(%rsi), %xmm11, %xmm11;
+ vpxor 13 * 16(%rsi), %xmm10, %xmm10;
+ vpxor 14 * 16(%rsi), %xmm9, %xmm9;
+ vpxor 15 * 16(%rsi), %xmm8, %xmm8;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+
+ vpxor (%r10), %xmm7, %xmm7;
+ vpxor %xmm6, %xmm7, %xmm7;
+ vpxor %xmm5, %xmm7, %xmm7;
+ vpxor %xmm4, %xmm7, %xmm7;
+ vpxor %xmm3, %xmm7, %xmm7;
+ vpxor %xmm2, %xmm7, %xmm7;
+ vpxor %xmm1, %xmm7, %xmm7;
+ vpxor %xmm0, %xmm7, %xmm7;
+ vpxor %xmm15, %xmm7, %xmm7;
+ vpxor %xmm14, %xmm7, %xmm7;
+ vpxor %xmm13, %xmm7, %xmm7;
+ vpxor %xmm12, %xmm7, %xmm7;
+ vpxor %xmm11, %xmm7, %xmm7;
+ vpxor %xmm10, %xmm7, %xmm7;
+ vpxor %xmm9, %xmm7, %xmm7;
+ vpxor %xmm8, %xmm7, %xmm7;
+ vmovdqu %xmm7, (%r10);
+ vmovdqu (7 * 16)(%rax), %xmm7;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ vzeroall;
+
+ movq (16 * 16 + 0 * 8)(%rsp), %r10;
+ movq (16 * 16 + 1 * 8)(%rsp), %r11;
+ movq (16 * 16 + 2 * 8)(%rsp), %r12;
+ movq (16 * 16 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ocb_auth
+ELF(.type _gcry_camellia_aesni_avx_ocb_auth,@function;)
+
+_gcry_camellia_aesni_avx_ocb_auth:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: abuf (16 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ subq $(16 * 16 + 4 * 8), %rsp;
+ andq $~31, %rsp;
+ movq %rsp, %rax;
+
+ movq %r10, (16 * 16 + 0 * 8)(%rsp);
+ movq %r11, (16 * 16 + 1 * 8)(%rsp);
+ movq %r12, (16 * 16 + 2 * 8)(%rsp);
+ movq %r13, (16 * 16 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
+
+ vmovdqu (%rdx), %xmm15;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ vmovdqu (n * 16)(%rsi), xreg; \
+ vpxor (lreg), %xmm15, %xmm15; \
+ vpxor xreg, %xmm15, xreg;
+
+ movq (0 * 8)(%r8), %r10;
+ movq (1 * 8)(%r8), %r11;
+ movq (2 * 8)(%r8), %r12;
+ movq (3 * 8)(%r8), %r13;
+ OCB_INPUT(0, %r10, %xmm0);
+ vmovdqu %xmm0, (15 * 16)(%rax);
+ OCB_INPUT(1, %r11, %xmm14);
+ OCB_INPUT(2, %r12, %xmm13);
+ OCB_INPUT(3, %r13, %xmm12);
+ movq (4 * 8)(%r8), %r10;
+ movq (5 * 8)(%r8), %r11;
+ movq (6 * 8)(%r8), %r12;
+ movq (7 * 8)(%r8), %r13;
+ OCB_INPUT(4, %r10, %xmm11);
+ OCB_INPUT(5, %r11, %xmm10);
+ OCB_INPUT(6, %r12, %xmm9);
+ OCB_INPUT(7, %r13, %xmm8);
+ movq (8 * 8)(%r8), %r10;
+ movq (9 * 8)(%r8), %r11;
+ movq (10 * 8)(%r8), %r12;
+ movq (11 * 8)(%r8), %r13;
+ OCB_INPUT(8, %r10, %xmm7);
+ OCB_INPUT(9, %r11, %xmm6);
+ OCB_INPUT(10, %r12, %xmm5);
+ OCB_INPUT(11, %r13, %xmm4);
+ movq (12 * 8)(%r8), %r10;
+ movq (13 * 8)(%r8), %r11;
+ movq (14 * 8)(%r8), %r12;
+ movq (15 * 8)(%r8), %r13;
+ OCB_INPUT(12, %r10, %xmm3);
+ OCB_INPUT(13, %r11, %xmm2);
+ OCB_INPUT(14, %r12, %xmm1);
+ OCB_INPUT(15, %r13, %xmm0);
+#undef OCB_INPUT
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %r10d;
+ cmovel %r10d, %r8d; /* max */
+
+ vmovdqu %xmm15, (%rdx);
+
+ movq %rcx, %r10;
+
+ /* inpack16_pre: */
+ vmovq (key_table)(CTX), %xmm15;
+ vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+ vpxor %xmm0, %xmm15, %xmm0;
+ vpxor %xmm1, %xmm15, %xmm1;
+ vpxor %xmm2, %xmm15, %xmm2;
+ vpxor %xmm3, %xmm15, %xmm3;
+ vpxor %xmm4, %xmm15, %xmm4;
+ vpxor %xmm5, %xmm15, %xmm5;
+ vpxor %xmm6, %xmm15, %xmm6;
+ vpxor %xmm7, %xmm15, %xmm7;
+ vpxor %xmm8, %xmm15, %xmm8;
+ vpxor %xmm9, %xmm15, %xmm9;
+ vpxor %xmm10, %xmm15, %xmm10;
+ vpxor %xmm11, %xmm15, %xmm11;
+ vpxor %xmm12, %xmm15, %xmm12;
+ vpxor %xmm13, %xmm15, %xmm13;
+ vpxor %xmm14, %xmm15, %xmm14;
+ vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+ call __camellia_enc_blk16;
+
+ vpxor %xmm7, %xmm6, %xmm6;
+ vpxor %xmm5, %xmm4, %xmm4;
+ vpxor %xmm3, %xmm2, %xmm2;
+ vpxor %xmm1, %xmm0, %xmm0;
+ vpxor %xmm15, %xmm14, %xmm14;
+ vpxor %xmm13, %xmm12, %xmm12;
+ vpxor %xmm11, %xmm10, %xmm10;
+ vpxor %xmm9, %xmm8, %xmm8;
+
+ vpxor %xmm6, %xmm4, %xmm4;
+ vpxor %xmm2, %xmm0, %xmm0;
+ vpxor %xmm14, %xmm12, %xmm12;
+ vpxor %xmm10, %xmm8, %xmm8;
+
+ vpxor %xmm4, %xmm0, %xmm0;
+ vpxor %xmm12, %xmm8, %xmm8;
+
+ vpxor %xmm0, %xmm8, %xmm0;
+ vpxor (%r10), %xmm0, %xmm0;
+ vmovdqu %xmm0, (%r10);
+
+ vzeroall;
+
+ movq (16 * 16 + 0 * 8)(%rsp), %r10;
+ movq (16 * 16 + 1 * 8)(%rsp), %r11;
+ movq (16 * 16 + 2 * 8)(%rsp), %r12;
+ movq (16 * 16 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;)
+
+/*
+ * IN:
+ * ab: 64-bit AB state
+ * cd: 64-bit CD state
+ */
+#define camellia_f(ab, x, t0, t1, t2, t3, t4, inv_shift_row, sbox4mask, \
+ _0f0f0f0fmask, pre_s1lo_mask, pre_s1hi_mask, key) \
+ vmovq key, t0; \
+ vpxor x, x, t3; \
+ \
+ vpxor ab, t0, x; \
+ \
+ /* \
+ * S-function with AES subbytes \
+ */ \
+ \
+ /* input rotation for sbox4 (<<< 1) */ \
+ vpand x, sbox4mask, t0; \
+ vpandn x, sbox4mask, x; \
+ vpaddw t0, t0, t1; \
+ vpsrlw $7, t0, t0; \
+ vpor t0, t1, t0; \
+ vpand sbox4mask, t0, t0; \
+ vpor t0, x, x; \
+ \
+ vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \
+ vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \
+ \
+ /* prefilter sboxes */ \
+ filter_8bit(x, pre_s1lo_mask, pre_s1hi_mask, _0f0f0f0fmask, t2); \
+ \
+ /* AES subbytes + AES shift rows + AES inv shift rows */ \
+ vaesenclast t3, x, x; \
+ \
+ /* postfilter sboxes */ \
+ filter_8bit(x, t0, t1, _0f0f0f0fmask, t2); \
+ \
+ /* output rotation for sbox2 (<<< 1) */ \
+ /* output rotation for sbox3 (>>> 1) */ \
+ vpshufb inv_shift_row, x, t1; \
+ vpshufb .Lsp0044440444044404mask rRIP, x, t4; \
+ vpshufb .Lsp1110111010011110mask rRIP, x, x; \
+ vpaddb t1, t1, t2; \
+ vpsrlw $7, t1, t0; \
+ vpsllw $7, t1, t3; \
+ vpor t0, t2, t0; \
+ vpsrlw $1, t1, t1; \
+ vpshufb .Lsp0222022222000222mask rRIP, t0, t0; \
+ vpor t1, t3, t1; \
+ \
+ vpxor x, t4, t4; \
+ vpshufb .Lsp3033303303303033mask rRIP, t1, t1; \
+ vpxor t4, t0, t0; \
+ vpxor t1, t0, t0; \
+ vpsrldq $8, t0, x; \
+ vpxor t0, x, x;
+
+#define vec_rol128(in, out, nrol, t0) \
+ vpshufd $0x4e, in, out; \
+ vpsllq $(nrol), in, t0; \
+ vpsrlq $(64-(nrol)), out, out; \
+ vpaddd t0, out, out;
+
+#define vec_ror128(in, out, nror, t0) \
+ vpshufd $0x4e, in, out; \
+ vpsrlq $(nror), in, t0; \
+ vpsllq $(64-(nror)), out, out; \
+ vpaddd t0, out, out;
+
+
+.align 16
+.Linv_shift_row_and_unpcklbw:
+ .byte 0x00, 0xff, 0x0d, 0xff, 0x0a, 0xff, 0x07, 0xff
+ .byte 0x04, 0xff, 0x01, 0xff, 0x0e, 0xff, 0x0b, 0xff
+.Lsp0044440444044404mask:
+ .long 0xffff0404, 0x0404ff04;
+ .long 0x0d0dff0d, 0x0d0dff0d;
+.Lsp1110111010011110mask:
+ .long 0x000000ff, 0x000000ff;
+ .long 0x0bffff0b, 0x0b0b0bff;
+.Lsp0222022222000222mask:
+ .long 0xff060606, 0xff060606;
+ .long 0x0c0cffff, 0xff0c0c0c;
+.Lsp3033303303303033mask:
+ .long 0x04ff0404, 0x04ff0404;
+ .long 0xff0a0aff, 0x0aff0a0a;
+.Lsbox4_input_mask:
+ .byte 0x00, 0xff, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00;
+.Lsigma1:
+ .long 0x3BCC908B, 0xA09E667F;
+.Lsigma2:
+ .long 0x4CAA73B2, 0xB67AE858;
+.Lsigma3:
+ .long 0xE94F82BE, 0xC6EF372F;
+.Lsigma4:
+ .long 0xF1D36F1C, 0x54FF53A5;
+.Lsigma5:
+ .long 0xDE682D1D, 0x10E527FA;
+.Lsigma6:
+ .long 0xB3E6C1FD, 0xB05688C2;
+
+
+.align 8
+ELF(.type __camellia_avx_setup128,@function;)
+__camellia_avx_setup128:
+ /* input:
+ * %rdi: ctx, CTX; subkey storage at key_table(CTX)
+ * %xmm0: key
+ */
+ CFI_STARTPROC();
+
+#define cmll_sub(n, ctx) (key_table+((n)*8))(ctx)
+#define KL128 %xmm0
+#define KA128 %xmm2
+
+ vpshufb .Lbswap128_mask rRIP, KL128, KL128;
+
+ vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11;
+ vmovq .Lsbox4_input_mask rRIP, %xmm12;
+ vbroadcastss .L0f0f0f0f rRIP, %xmm13;
+ vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14;
+ vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15;
+
+ /*
+ * Generate KA
+ */
+ vpsrldq $8, KL128, %xmm2;
+ vmovdqa KL128, %xmm3;
+ vpslldq $8, %xmm3, %xmm3;
+ vpsrldq $8, %xmm3, %xmm3;
+
+ camellia_f(%xmm2, %xmm4, %xmm1,
+ %xmm5, %xmm6, %xmm7, %xmm8,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP);
+ vpxor %xmm4, %xmm3, %xmm3;
+ camellia_f(%xmm3, %xmm2, %xmm1,
+ %xmm5, %xmm6, %xmm7, %xmm8,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP);
+ camellia_f(%xmm2, %xmm3, %xmm1,
+ %xmm5, %xmm6, %xmm7, %xmm8,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP);
+ vpxor %xmm4, %xmm3, %xmm3;
+ camellia_f(%xmm3, %xmm4, %xmm1,
+ %xmm5, %xmm6, %xmm7, %xmm8,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP);
+
+ vpslldq $8, %xmm3, %xmm3;
+ vpxor %xmm4, %xmm2, %xmm2;
+ vpsrldq $8, %xmm3, %xmm3;
+ vpslldq $8, %xmm2, KA128;
+ vpor %xmm3, KA128, KA128;
+
+ /*
+ * Generate subkeys
+ */
+ vmovdqu KA128, cmll_sub(24, CTX);
+ vec_rol128(KL128, %xmm3, 15, %xmm15);
+ vec_rol128(KA128, %xmm4, 15, %xmm15);
+ vec_rol128(KA128, %xmm5, 30, %xmm15);
+ vec_rol128(KL128, %xmm6, 45, %xmm15);
+ vec_rol128(KA128, %xmm7, 45, %xmm15);
+ vec_rol128(KL128, %xmm8, 60, %xmm15);
+ vec_rol128(KA128, %xmm9, 60, %xmm15);
+ vec_ror128(KL128, %xmm10, 128-77, %xmm15);
+
+ /* absorb kw2 to other subkeys */
+ vpslldq $8, KL128, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, KA128, KA128;
+ vpxor %xmm15, %xmm3, %xmm3;
+ vpxor %xmm15, %xmm4, %xmm4;
+
+ /* subl(1) ^= subr(1) & ~subr(9); */
+ vpandn %xmm15, %xmm5, %xmm13;
+ vpslldq $12, %xmm13, %xmm13;
+ vpsrldq $8, %xmm13, %xmm13;
+ vpxor %xmm13, %xmm15, %xmm15;
+ /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm5, %xmm14;
+ vpslld $1, %xmm14, %xmm11;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm11, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpxor %xmm15, %xmm6, %xmm6;
+ vpxor %xmm15, %xmm8, %xmm8;
+ vpxor %xmm15, %xmm9, %xmm9;
+
+ /* subl(1) ^= subr(1) & ~subr(17); */
+ vpandn %xmm15, %xmm10, %xmm13;
+ vpslldq $12, %xmm13, %xmm13;
+ vpsrldq $8, %xmm13, %xmm13;
+ vpxor %xmm13, %xmm15, %xmm15;
+ /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm10, %xmm14;
+ vpslld $1, %xmm14, %xmm11;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm11, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpshufd $0x1b, KL128, KL128;
+ vpshufd $0x1b, KA128, KA128;
+ vpshufd $0x1b, %xmm3, %xmm3;
+ vpshufd $0x1b, %xmm4, %xmm4;
+ vpshufd $0x1b, %xmm5, %xmm5;
+ vpshufd $0x1b, %xmm6, %xmm6;
+ vpshufd $0x1b, %xmm7, %xmm7;
+ vpshufd $0x1b, %xmm8, %xmm8;
+ vpshufd $0x1b, %xmm9, %xmm9;
+ vpshufd $0x1b, %xmm10, %xmm10;
+
+ vmovdqu KL128, cmll_sub(0, CTX);
+ vpshufd $0x1b, KL128, KL128;
+ vmovdqu KA128, cmll_sub(2, CTX);
+ vmovdqu %xmm3, cmll_sub(4, CTX);
+ vmovdqu %xmm4, cmll_sub(6, CTX);
+ vmovdqu %xmm5, cmll_sub(8, CTX);
+ vmovdqu %xmm6, cmll_sub(10, CTX);
+ vpsrldq $8, %xmm8, %xmm8;
+ vmovq %xmm7, cmll_sub(12, CTX);
+ vmovq %xmm8, cmll_sub(13, CTX);
+ vmovdqu %xmm9, cmll_sub(14, CTX);
+ vmovdqu %xmm10, cmll_sub(16, CTX);
+
+ vmovdqu cmll_sub(24, CTX), KA128;
+
+ vec_ror128(KL128, %xmm3, 128 - 94, %xmm7);
+ vec_ror128(KA128, %xmm4, 128 - 94, %xmm7);
+ vec_ror128(KL128, %xmm5, 128 - 111, %xmm7);
+ vec_ror128(KA128, %xmm6, 128 - 111, %xmm7);
+
+ vpxor %xmm15, %xmm3, %xmm3;
+ vpxor %xmm15, %xmm4, %xmm4;
+ vpxor %xmm15, %xmm5, %xmm5;
+ vpslldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm6, %xmm6;
+
+ /* absorb kw4 to other subkeys */
+ vpslldq $8, %xmm6, %xmm15;
+ vpxor %xmm15, %xmm5, %xmm5;
+ vpxor %xmm15, %xmm4, %xmm4;
+ vpxor %xmm15, %xmm3, %xmm3;
+
+ /* subl(25) ^= subr(25) & ~subr(16); */
+ vpshufd $0x1b, cmll_sub(16, CTX), %xmm10;
+ vpandn %xmm15, %xmm10, %xmm13;
+ vpslldq $4, %xmm13, %xmm13;
+ vpxor %xmm13, %xmm15, %xmm15;
+ /* dw = subl(25) & subl(16), subr(25) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm10, %xmm14;
+ vpslld $1, %xmm14, %xmm11;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm11, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpshufd $0x1b, %xmm3, %xmm3;
+ vpshufd $0x1b, %xmm4, %xmm4;
+ vpshufd $0x1b, %xmm5, %xmm5;
+ vpshufd $0x1b, %xmm6, %xmm6;
+
+ vmovdqu %xmm3, cmll_sub(18, CTX);
+ vmovdqu %xmm4, cmll_sub(20, CTX);
+ vmovdqu %xmm5, cmll_sub(22, CTX);
+ vmovdqu %xmm6, cmll_sub(24, CTX);
+
+ vpshufd $0x1b, cmll_sub(14, CTX), %xmm3;
+ vpshufd $0x1b, cmll_sub(12, CTX), %xmm4;
+ vpshufd $0x1b, cmll_sub(10, CTX), %xmm5;
+ vpshufd $0x1b, cmll_sub(8, CTX), %xmm6;
+
+ vpxor %xmm15, %xmm3, %xmm3;
+ vpxor %xmm15, %xmm4, %xmm4;
+ vpxor %xmm15, %xmm5, %xmm5;
+
+ /* subl(25) ^= subr(25) & ~subr(8); */
+ vpandn %xmm15, %xmm6, %xmm13;
+ vpslldq $4, %xmm13, %xmm13;
+ vpxor %xmm13, %xmm15, %xmm15;
+ /* dw = subl(25) & subl(8), subr(25) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm6, %xmm14;
+ vpslld $1, %xmm14, %xmm11;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm11, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpshufd $0x1b, %xmm3, %xmm3;
+ vpshufd $0x1b, %xmm4, %xmm4;
+ vpshufd $0x1b, %xmm5, %xmm5;
+
+ vmovdqu %xmm3, cmll_sub(14, CTX);
+ vmovdqu %xmm4, cmll_sub(12, CTX);
+ vmovdqu %xmm5, cmll_sub(10, CTX);
+
+ vpshufd $0x1b, cmll_sub(6, CTX), %xmm6;
+ vpshufd $0x1b, cmll_sub(4, CTX), %xmm4;
+ vpshufd $0x1b, cmll_sub(2, CTX), %xmm2;
+ vpshufd $0x1b, cmll_sub(0, CTX), %xmm0;
+
+ vpxor %xmm15, %xmm6, %xmm6;
+ vpxor %xmm15, %xmm4, %xmm4;
+ vpxor %xmm15, %xmm2, %xmm2;
+ vpxor %xmm15, %xmm0, %xmm0;
+
+ vpshufd $0x1b, %xmm6, %xmm6;
+ vpshufd $0x1b, %xmm4, %xmm4;
+ vpshufd $0x1b, %xmm2, %xmm2;
+ vpshufd $0x1b, %xmm0, %xmm0;
+
+ vpsrldq $8, %xmm2, %xmm3;
+ vpsrldq $8, %xmm4, %xmm5;
+ vpsrldq $8, %xmm6, %xmm7;
+
+ /*
+ * key XOR is end of F-function.
+ */
+ vpxor %xmm2, %xmm0, %xmm0;
+ vpxor %xmm4, %xmm2, %xmm2;
+
+ vmovq %xmm0, cmll_sub(0, CTX);
+ vmovq %xmm3, cmll_sub(2, CTX);
+ vpxor %xmm5, %xmm3, %xmm3;
+ vpxor %xmm6, %xmm4, %xmm4;
+ vpxor %xmm7, %xmm5, %xmm5;
+ vmovq %xmm2, cmll_sub(3, CTX);
+ vmovq %xmm3, cmll_sub(4, CTX);
+ vmovq %xmm4, cmll_sub(5, CTX);
+ vmovq %xmm5, cmll_sub(6, CTX);
+
+ vmovq cmll_sub(7, CTX), %xmm7;
+ vmovq cmll_sub(8, CTX), %xmm8;
+ vmovq cmll_sub(9, CTX), %xmm9;
+ vmovq cmll_sub(10, CTX), %xmm10;
+ /* tl = subl(10) ^ (subr(10) & ~subr(8)); */
+ vpandn %xmm10, %xmm8, %xmm15;
+ vpsrldq $4, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm10, %xmm0;
+ /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm8, %xmm0, %xmm15;
+ vpslld $1, %xmm15, %xmm14;
+ vpsrld $31, %xmm15, %xmm15;
+ vpaddd %xmm14, %xmm15, %xmm15;
+ vpslldq $12, %xmm15, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm0, %xmm0;
+
+ vpxor %xmm0, %xmm6, %xmm6;
+ vmovq %xmm6, cmll_sub(7, CTX);
+
+ vmovq cmll_sub(11, CTX), %xmm11;
+ vmovq cmll_sub(12, CTX), %xmm12;
+ vmovq cmll_sub(13, CTX), %xmm13;
+ vmovq cmll_sub(14, CTX), %xmm14;
+ vmovq cmll_sub(15, CTX), %xmm15;
+ /* tl = subl(7) ^ (subr(7) & ~subr(9)); */
+ vpandn %xmm7, %xmm9, %xmm1;
+ vpsrldq $4, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm7, %xmm0;
+ /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm9, %xmm0, %xmm1;
+ vpslld $1, %xmm1, %xmm2;
+ vpsrld $31, %xmm1, %xmm1;
+ vpaddd %xmm2, %xmm1, %xmm1;
+ vpslldq $12, %xmm1, %xmm1;
+ vpsrldq $8, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm0, %xmm0;
+
+ vpxor %xmm11, %xmm0, %xmm0;
+ vpxor %xmm12, %xmm10, %xmm10;
+ vpxor %xmm13, %xmm11, %xmm11;
+ vpxor %xmm14, %xmm12, %xmm12;
+ vpxor %xmm15, %xmm13, %xmm13;
+ vmovq %xmm0, cmll_sub(10, CTX);
+ vmovq %xmm10, cmll_sub(11, CTX);
+ vmovq %xmm11, cmll_sub(12, CTX);
+ vmovq %xmm12, cmll_sub(13, CTX);
+ vmovq %xmm13, cmll_sub(14, CTX);
+
+ vmovq cmll_sub(16, CTX), %xmm6;
+ vmovq cmll_sub(17, CTX), %xmm7;
+ vmovq cmll_sub(18, CTX), %xmm8;
+ vmovq cmll_sub(19, CTX), %xmm9;
+ vmovq cmll_sub(20, CTX), %xmm10;
+ /* tl = subl(18) ^ (subr(18) & ~subr(16)); */
+ vpandn %xmm8, %xmm6, %xmm1;
+ vpsrldq $4, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm8, %xmm0;
+ /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm6, %xmm0, %xmm1;
+ vpslld $1, %xmm1, %xmm2;
+ vpsrld $31, %xmm1, %xmm1;
+ vpaddd %xmm2, %xmm1, %xmm1;
+ vpslldq $12, %xmm1, %xmm1;
+ vpsrldq $8, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm0, %xmm0;
+
+ vpxor %xmm14, %xmm0, %xmm0;
+ vmovq %xmm0, cmll_sub(15, CTX);
+
+ /* tl = subl(15) ^ (subr(15) & ~subr(17)); */
+ vpandn %xmm15, %xmm7, %xmm1;
+ vpsrldq $4, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm15, %xmm0;
+ /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm7, %xmm0, %xmm1;
+ vpslld $1, %xmm1, %xmm2;
+ vpsrld $31, %xmm1, %xmm1;
+ vpaddd %xmm2, %xmm1, %xmm1;
+ vpslldq $12, %xmm1, %xmm1;
+ vpsrldq $8, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm0, %xmm0;
+
+ vmovq cmll_sub(21, CTX), %xmm1;
+ vmovq cmll_sub(22, CTX), %xmm2;
+ vmovq cmll_sub(23, CTX), %xmm3;
+ vmovq cmll_sub(24, CTX), %xmm4;
+
+ vpxor %xmm9, %xmm0, %xmm0;
+ vpxor %xmm10, %xmm8, %xmm8;
+ vpxor %xmm1, %xmm9, %xmm9;
+ vpxor %xmm2, %xmm10, %xmm10;
+ vpxor %xmm3, %xmm1, %xmm1;
+ vpxor %xmm4, %xmm3, %xmm3;
+
+ vmovq %xmm0, cmll_sub(18, CTX);
+ vmovq %xmm8, cmll_sub(19, CTX);
+ vmovq %xmm9, cmll_sub(20, CTX);
+ vmovq %xmm10, cmll_sub(21, CTX);
+ vmovq %xmm1, cmll_sub(22, CTX);
+ vmovq %xmm2, cmll_sub(23, CTX);
+ vmovq %xmm3, cmll_sub(24, CTX);
+
+ /* kw2 and kw4 are unused now. */
+ movq $0, cmll_sub(1, CTX);
+ movq $0, cmll_sub(25, CTX);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
+
+.align 8
+ELF(.type __camellia_avx_setup256,@function;)
+
+__camellia_avx_setup256:
+ /* input:
+ * %rdi: ctx, CTX; subkey storage at key_table(CTX)
+ * %xmm0 & %xmm1: key
+ */
+ CFI_STARTPROC();
+
+#define KL128 %xmm0
+#define KR128 %xmm1
+#define KA128 %xmm2
+#define KB128 %xmm3
+
+ vpshufb .Lbswap128_mask rRIP, KL128, KL128;
+ vpshufb .Lbswap128_mask rRIP, KR128, KR128;
+
+ vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11;
+ vmovq .Lsbox4_input_mask rRIP, %xmm12;
+ vbroadcastss .L0f0f0f0f rRIP, %xmm13;
+ vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14;
+ vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15;
+
+ /*
+ * Generate KA
+ */
+ vpxor KL128, KR128, %xmm3;
+ vpsrldq $8, KR128, %xmm6;
+ vpsrldq $8, %xmm3, %xmm2;
+ vpslldq $8, %xmm3, %xmm3;
+ vpsrldq $8, %xmm3, %xmm3;
+
+ camellia_f(%xmm2, %xmm4, %xmm5,
+ %xmm7, %xmm8, %xmm9, %xmm10,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP);
+ vpxor %xmm4, %xmm3, %xmm3;
+ camellia_f(%xmm3, %xmm2, %xmm5,
+ %xmm7, %xmm8, %xmm9, %xmm10,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP);
+ vpxor %xmm6, %xmm2, %xmm2;
+ camellia_f(%xmm2, %xmm3, %xmm5,
+ %xmm7, %xmm8, %xmm9, %xmm10,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP);
+ vpxor %xmm4, %xmm3, %xmm3;
+ vpxor KR128, %xmm3, %xmm3;
+ camellia_f(%xmm3, %xmm4, %xmm5,
+ %xmm7, %xmm8, %xmm9, %xmm10,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP);
+
+ vpslldq $8, %xmm3, %xmm3;
+ vpxor %xmm4, %xmm2, %xmm2;
+ vpsrldq $8, %xmm3, %xmm3;
+ vpslldq $8, %xmm2, KA128;
+ vpor %xmm3, KA128, KA128;
+
+ /*
+ * Generate KB
+ */
+ vpxor KA128, KR128, %xmm3;
+ vpsrldq $8, %xmm3, %xmm4;
+ vpslldq $8, %xmm3, %xmm3;
+ vpsrldq $8, %xmm3, %xmm3;
+
+ camellia_f(%xmm4, %xmm5, %xmm6,
+ %xmm7, %xmm8, %xmm9, %xmm10,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 rRIP);
+ vpxor %xmm5, %xmm3, %xmm3;
+
+ camellia_f(%xmm3, %xmm5, %xmm6,
+ %xmm7, %xmm8, %xmm9, %xmm10,
+ %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 rRIP);
+ vpslldq $8, %xmm3, %xmm3;
+ vpxor %xmm5, %xmm4, %xmm4;
+ vpsrldq $8, %xmm3, %xmm3;
+ vpslldq $8, %xmm4, %xmm4;
+ vpor %xmm3, %xmm4, KB128;
+
+ /*
+ * Generate subkeys
+ */
+ vmovdqu KB128, cmll_sub(32, CTX);
+ vec_rol128(KR128, %xmm4, 15, %xmm15);
+ vec_rol128(KA128, %xmm5, 15, %xmm15);
+ vec_rol128(KR128, %xmm6, 30, %xmm15);
+ vec_rol128(KB128, %xmm7, 30, %xmm15);
+ vec_rol128(KL128, %xmm8, 45, %xmm15);
+ vec_rol128(KA128, %xmm9, 45, %xmm15);
+ vec_rol128(KL128, %xmm10, 60, %xmm15);
+ vec_rol128(KR128, %xmm11, 60, %xmm15);
+ vec_rol128(KB128, %xmm12, 60, %xmm15);
+
+ /* absorb kw2 to other subkeys */
+ vpslldq $8, KL128, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, KB128, KB128;
+ vpxor %xmm15, %xmm4, %xmm4;
+ vpxor %xmm15, %xmm5, %xmm5;
+
+ /* subl(1) ^= subr(1) & ~subr(9); */
+ vpandn %xmm15, %xmm6, %xmm13;
+ vpslldq $12, %xmm13, %xmm13;
+ vpsrldq $8, %xmm13, %xmm13;
+ vpxor %xmm13, %xmm15, %xmm15;
+ /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm6, %xmm14;
+ vpslld $1, %xmm14, %xmm13;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm13, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpxor %xmm15, %xmm7, %xmm7;
+ vpxor %xmm15, %xmm8, %xmm8;
+ vpxor %xmm15, %xmm9, %xmm9;
+
+ vpshufd $0x1b, KL128, KL128;
+ vpshufd $0x1b, KB128, KB128;
+ vpshufd $0x1b, %xmm4, %xmm4;
+ vpshufd $0x1b, %xmm5, %xmm5;
+ vpshufd $0x1b, %xmm6, %xmm6;
+ vpshufd $0x1b, %xmm7, %xmm7;
+ vpshufd $0x1b, %xmm8, %xmm8;
+ vpshufd $0x1b, %xmm9, %xmm9;
+
+ vmovdqu KL128, cmll_sub(0, CTX);
+ vpshufd $0x1b, KL128, KL128;
+ vmovdqu KB128, cmll_sub(2, CTX);
+ vmovdqu %xmm4, cmll_sub(4, CTX);
+ vmovdqu %xmm5, cmll_sub(6, CTX);
+ vmovdqu %xmm6, cmll_sub(8, CTX);
+ vmovdqu %xmm7, cmll_sub(10, CTX);
+ vmovdqu %xmm8, cmll_sub(12, CTX);
+ vmovdqu %xmm9, cmll_sub(14, CTX);
+
+ vmovdqu cmll_sub(32, CTX), KB128;
+
+ /* subl(1) ^= subr(1) & ~subr(17); */
+ vpandn %xmm15, %xmm10, %xmm13;
+ vpslldq $12, %xmm13, %xmm13;
+ vpsrldq $8, %xmm13, %xmm13;
+ vpxor %xmm13, %xmm15, %xmm15;
+ /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm10, %xmm14;
+ vpslld $1, %xmm14, %xmm13;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm13, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpxor %xmm15, %xmm11, %xmm11;
+ vpxor %xmm15, %xmm12, %xmm12;
+
+ vec_ror128(KL128, %xmm4, 128-77, %xmm14);
+ vec_ror128(KA128, %xmm5, 128-77, %xmm14);
+ vec_ror128(KR128, %xmm6, 128-94, %xmm14);
+ vec_ror128(KA128, %xmm7, 128-94, %xmm14);
+ vec_ror128(KL128, %xmm8, 128-111, %xmm14);
+ vec_ror128(KB128, %xmm9, 128-111, %xmm14);
+
+ vpxor %xmm15, %xmm4, %xmm4;
+
+ vpshufd $0x1b, %xmm10, %xmm10;
+ vpshufd $0x1b, %xmm11, %xmm11;
+ vpshufd $0x1b, %xmm12, %xmm12;
+ vpshufd $0x1b, %xmm4, %xmm4;
+
+ vmovdqu %xmm10, cmll_sub(16, CTX);
+ vmovdqu %xmm11, cmll_sub(18, CTX);
+ vmovdqu %xmm12, cmll_sub(20, CTX);
+ vmovdqu %xmm4, cmll_sub(22, CTX);
+
+ /* subl(1) ^= subr(1) & ~subr(25); */
+ vpandn %xmm15, %xmm5, %xmm13;
+ vpslldq $12, %xmm13, %xmm13;
+ vpsrldq $8, %xmm13, %xmm13;
+ vpxor %xmm13, %xmm15, %xmm15;
+ /* dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm5, %xmm14;
+ vpslld $1, %xmm14, %xmm13;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm13, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpxor %xmm15, %xmm6, %xmm6;
+ vpxor %xmm15, %xmm7, %xmm7;
+ vpxor %xmm15, %xmm8, %xmm8;
+ vpslldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm9, %xmm9;
+
+ /* absorb kw4 to other subkeys */
+ vpslldq $8, %xmm9, %xmm15;
+ vpxor %xmm15, %xmm8, %xmm8;
+ vpxor %xmm15, %xmm7, %xmm7;
+ vpxor %xmm15, %xmm6, %xmm6;
+
+ /* subl(33) ^= subr(33) & ~subr(24); */
+ vpandn %xmm15, %xmm5, %xmm14;
+ vpslldq $4, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+ /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm5, %xmm14;
+ vpslld $1, %xmm14, %xmm13;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm13, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpshufd $0x1b, %xmm5, %xmm5;
+ vpshufd $0x1b, %xmm6, %xmm6;
+ vpshufd $0x1b, %xmm7, %xmm7;
+ vpshufd $0x1b, %xmm8, %xmm8;
+ vpshufd $0x1b, %xmm9, %xmm9;
+
+ vmovdqu %xmm5, cmll_sub(24, CTX);
+ vmovdqu %xmm6, cmll_sub(26, CTX);
+ vmovdqu %xmm7, cmll_sub(28, CTX);
+ vmovdqu %xmm8, cmll_sub(30, CTX);
+ vmovdqu %xmm9, cmll_sub(32, CTX);
+
+ vpshufd $0x1b, cmll_sub(22, CTX), %xmm0;
+ vpshufd $0x1b, cmll_sub(20, CTX), %xmm1;
+ vpshufd $0x1b, cmll_sub(18, CTX), %xmm2;
+ vpshufd $0x1b, cmll_sub(16, CTX), %xmm3;
+ vpshufd $0x1b, cmll_sub(14, CTX), %xmm4;
+ vpshufd $0x1b, cmll_sub(12, CTX), %xmm5;
+ vpshufd $0x1b, cmll_sub(10, CTX), %xmm6;
+ vpshufd $0x1b, cmll_sub(8, CTX), %xmm7;
+
+ vpxor %xmm15, %xmm0, %xmm0;
+ vpxor %xmm15, %xmm1, %xmm1;
+ vpxor %xmm15, %xmm2, %xmm2;
+
+ /* subl(33) ^= subr(33) & ~subr(24); */
+ vpandn %xmm15, %xmm3, %xmm14;
+ vpslldq $4, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+ /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm3, %xmm14;
+ vpslld $1, %xmm14, %xmm13;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm13, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpxor %xmm15, %xmm4, %xmm4;
+ vpxor %xmm15, %xmm5, %xmm5;
+ vpxor %xmm15, %xmm6, %xmm6;
+
+ vpshufd $0x1b, %xmm0, %xmm0;
+ vpshufd $0x1b, %xmm1, %xmm1;
+ vpshufd $0x1b, %xmm2, %xmm2;
+ vpshufd $0x1b, %xmm4, %xmm4;
+ vpshufd $0x1b, %xmm5, %xmm5;
+ vpshufd $0x1b, %xmm6, %xmm6;
+
+ vmovdqu %xmm0, cmll_sub(22, CTX);
+ vmovdqu %xmm1, cmll_sub(20, CTX);
+ vmovdqu %xmm2, cmll_sub(18, CTX);
+ vmovdqu %xmm4, cmll_sub(14, CTX);
+ vmovdqu %xmm5, cmll_sub(12, CTX);
+ vmovdqu %xmm6, cmll_sub(10, CTX);
+
+ vpshufd $0x1b, cmll_sub(6, CTX), %xmm6;
+ vpshufd $0x1b, cmll_sub(4, CTX), %xmm4;
+ vpshufd $0x1b, cmll_sub(2, CTX), %xmm2;
+ vpshufd $0x1b, cmll_sub(0, CTX), %xmm0;
+
+ /* subl(33) ^= subr(33) & ~subr(24); */
+ vpandn %xmm15, %xmm7, %xmm14;
+ vpslldq $4, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+ /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */
+ vpand %xmm15, %xmm7, %xmm14;
+ vpslld $1, %xmm14, %xmm13;
+ vpsrld $31, %xmm14, %xmm14;
+ vpaddd %xmm13, %xmm14, %xmm14;
+ vpsrldq $12, %xmm14, %xmm14;
+ vpslldq $8, %xmm14, %xmm14;
+ vpxor %xmm14, %xmm15, %xmm15;
+
+ vpxor %xmm15, %xmm6, %xmm6;
+ vpxor %xmm15, %xmm4, %xmm4;
+ vpxor %xmm15, %xmm2, %xmm2;
+ vpxor %xmm15, %xmm0, %xmm0;
+
+ vpshufd $0x1b, %xmm6, %xmm6;
+ vpshufd $0x1b, %xmm4, %xmm4;
+ vpshufd $0x1b, %xmm2, %xmm2;
+ vpshufd $0x1b, %xmm0, %xmm0;
+
+ vpsrldq $8, %xmm2, %xmm3;
+ vpsrldq $8, %xmm4, %xmm5;
+ vpsrldq $8, %xmm6, %xmm7;
+
+ /*
+ * key XOR is end of F-function.
+ */
+ vpxor %xmm2, %xmm0, %xmm0;
+ vpxor %xmm4, %xmm2, %xmm2;
+
+ vmovq %xmm0, cmll_sub(0, CTX);
+ vmovq %xmm3, cmll_sub(2, CTX);
+ vpxor %xmm5, %xmm3, %xmm3;
+ vpxor %xmm6, %xmm4, %xmm4;
+ vpxor %xmm7, %xmm5, %xmm5;
+ vmovq %xmm2, cmll_sub(3, CTX);
+ vmovq %xmm3, cmll_sub(4, CTX);
+ vmovq %xmm4, cmll_sub(5, CTX);
+ vmovq %xmm5, cmll_sub(6, CTX);
+
+ vmovq cmll_sub(7, CTX), %xmm7;
+ vmovq cmll_sub(8, CTX), %xmm8;
+ vmovq cmll_sub(9, CTX), %xmm9;
+ vmovq cmll_sub(10, CTX), %xmm10;
+ /* tl = subl(10) ^ (subr(10) & ~subr(8)); */
+ vpandn %xmm10, %xmm8, %xmm15;
+ vpsrldq $4, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm10, %xmm0;
+ /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm8, %xmm0, %xmm15;
+ vpslld $1, %xmm15, %xmm14;
+ vpsrld $31, %xmm15, %xmm15;
+ vpaddd %xmm14, %xmm15, %xmm15;
+ vpslldq $12, %xmm15, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm0, %xmm0;
+
+ vpxor %xmm0, %xmm6, %xmm6;
+ vmovq %xmm6, cmll_sub(7, CTX);
+
+ vmovq cmll_sub(11, CTX), %xmm11;
+ vmovq cmll_sub(12, CTX), %xmm12;
+ vmovq cmll_sub(13, CTX), %xmm13;
+ vmovq cmll_sub(14, CTX), %xmm14;
+ vmovq cmll_sub(15, CTX), %xmm15;
+ /* tl = subl(7) ^ (subr(7) & ~subr(9)); */
+ vpandn %xmm7, %xmm9, %xmm1;
+ vpsrldq $4, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm7, %xmm0;
+ /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm9, %xmm0, %xmm1;
+ vpslld $1, %xmm1, %xmm2;
+ vpsrld $31, %xmm1, %xmm1;
+ vpaddd %xmm2, %xmm1, %xmm1;
+ vpslldq $12, %xmm1, %xmm1;
+ vpsrldq $8, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm0, %xmm0;
+
+ vpxor %xmm11, %xmm0, %xmm0;
+ vpxor %xmm12, %xmm10, %xmm10;
+ vpxor %xmm13, %xmm11, %xmm11;
+ vpxor %xmm14, %xmm12, %xmm12;
+ vpxor %xmm15, %xmm13, %xmm13;
+ vmovq %xmm0, cmll_sub(10, CTX);
+ vmovq %xmm10, cmll_sub(11, CTX);
+ vmovq %xmm11, cmll_sub(12, CTX);
+ vmovq %xmm12, cmll_sub(13, CTX);
+ vmovq %xmm13, cmll_sub(14, CTX);
+
+ vmovq cmll_sub(16, CTX), %xmm6;
+ vmovq cmll_sub(17, CTX), %xmm7;
+ vmovq cmll_sub(18, CTX), %xmm8;
+ vmovq cmll_sub(19, CTX), %xmm9;
+ vmovq cmll_sub(20, CTX), %xmm10;
+ /* tl = subl(18) ^ (subr(18) & ~subr(16)); */
+ vpandn %xmm8, %xmm6, %xmm1;
+ vpsrldq $4, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm8, %xmm0;
+ /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm6, %xmm0, %xmm1;
+ vpslld $1, %xmm1, %xmm2;
+ vpsrld $31, %xmm1, %xmm1;
+ vpaddd %xmm2, %xmm1, %xmm1;
+ vpslldq $12, %xmm1, %xmm1;
+ vpsrldq $8, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm0, %xmm0;
+
+ vpxor %xmm14, %xmm0, %xmm0;
+ vmovq %xmm0, cmll_sub(15, CTX);
+
+ /* tl = subl(15) ^ (subr(15) & ~subr(17)); */
+ vpandn %xmm15, %xmm7, %xmm1;
+ vpsrldq $4, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm15, %xmm0;
+ /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm7, %xmm0, %xmm1;
+ vpslld $1, %xmm1, %xmm2;
+ vpsrld $31, %xmm1, %xmm1;
+ vpaddd %xmm2, %xmm1, %xmm1;
+ vpslldq $12, %xmm1, %xmm1;
+ vpsrldq $8, %xmm1, %xmm1;
+ vpxor %xmm1, %xmm0, %xmm0;
+
+ vmovq cmll_sub(21, CTX), %xmm1;
+ vmovq cmll_sub(22, CTX), %xmm2;
+ vmovq cmll_sub(23, CTX), %xmm3;
+ vmovq cmll_sub(24, CTX), %xmm4;
+
+ vpxor %xmm9, %xmm0, %xmm0;
+ vpxor %xmm10, %xmm8, %xmm8;
+ vpxor %xmm1, %xmm9, %xmm9;
+ vpxor %xmm2, %xmm10, %xmm10;
+ vpxor %xmm3, %xmm1, %xmm1;
+
+ vmovq %xmm0, cmll_sub(18, CTX);
+ vmovq %xmm8, cmll_sub(19, CTX);
+ vmovq %xmm9, cmll_sub(20, CTX);
+ vmovq %xmm10, cmll_sub(21, CTX);
+ vmovq %xmm1, cmll_sub(22, CTX);
+
+ vmovq cmll_sub(25, CTX), %xmm5;
+ vmovq cmll_sub(26, CTX), %xmm6;
+ vmovq cmll_sub(27, CTX), %xmm7;
+ vmovq cmll_sub(28, CTX), %xmm8;
+ vmovq cmll_sub(29, CTX), %xmm9;
+ vmovq cmll_sub(30, CTX), %xmm10;
+ vmovq cmll_sub(31, CTX), %xmm11;
+ vmovq cmll_sub(32, CTX), %xmm12;
+
+ /* tl = subl(26) ^ (subr(26) & ~subr(24)); */
+ vpandn %xmm6, %xmm4, %xmm15;
+ vpsrldq $4, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm6, %xmm0;
+ /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm4, %xmm0, %xmm15;
+ vpslld $1, %xmm15, %xmm14;
+ vpsrld $31, %xmm15, %xmm15;
+ vpaddd %xmm14, %xmm15, %xmm15;
+ vpslldq $12, %xmm15, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm0, %xmm0;
+
+ vpxor %xmm0, %xmm2, %xmm2;
+ vmovq %xmm2, cmll_sub(23, CTX);
+
+ /* tl = subl(23) ^ (subr(23) & ~subr(25)); */
+ vpandn %xmm3, %xmm5, %xmm15;
+ vpsrldq $4, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm3, %xmm0;
+ /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */
+ vpand %xmm5, %xmm0, %xmm15;
+ vpslld $1, %xmm15, %xmm14;
+ vpsrld $31, %xmm15, %xmm15;
+ vpaddd %xmm14, %xmm15, %xmm15;
+ vpslldq $12, %xmm15, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15;
+ vpxor %xmm15, %xmm0, %xmm0;
+
+ vpxor %xmm7, %xmm0, %xmm0;
+ vpxor %xmm8, %xmm6, %xmm6;
+ vpxor %xmm9, %xmm7, %xmm7;
+ vpxor %xmm10, %xmm8, %xmm8;
+ vpxor %xmm11, %xmm9, %xmm9;
+ vpxor %xmm12, %xmm11, %xmm11;
+
+ vmovq %xmm0, cmll_sub(26, CTX);
+ vmovq %xmm6, cmll_sub(27, CTX);
+ vmovq %xmm7, cmll_sub(28, CTX);
+ vmovq %xmm8, cmll_sub(29, CTX);
+ vmovq %xmm9, cmll_sub(30, CTX);
+ vmovq %xmm10, cmll_sub(31, CTX);
+ vmovq %xmm11, cmll_sub(32, CTX);
+
+ /* kw2 and kw4 are unused now. */
+ movq $0, cmll_sub(1, CTX);
+ movq $0, cmll_sub(33, CTX);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_keygen
+ELF(.type _gcry_camellia_aesni_avx_keygen,@function;)
+
+_gcry_camellia_aesni_avx_keygen:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: key
+ * %rdx: keylen
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ vmovdqu (%rsi), %xmm0;
+ cmpl $24, %edx;
+ jb __camellia_avx_setup128;
+ je .Lprepare_key192;
+
+ vmovdqu 16(%rsi), %xmm1;
+ jmp __camellia_avx_setup256;
+
+.Lprepare_key192:
+ vpcmpeqd %xmm2, %xmm2, %xmm2;
+ vmovq 16(%rsi), %xmm1;
+
+ vpxor %xmm1, %xmm2, %xmm2;
+ vpslldq $8, %xmm2, %xmm2;
+ vpor %xmm2, %xmm1, %xmm1;
+
+ jmp __camellia_avx_setup256;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;)
+
+#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S
new file mode 100644
index 0000000000..f620f04036
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S
@@ -0,0 +1,1782 @@
+/* camellia-avx2-aesni-amd64.S - AES-NI/AVX2 implementation of Camellia cipher
+ *
+ * Copyright (C) 2013-2015,2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct CAMELLIA_context: */
+#define key_table 0
+#define key_bitlength CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+#define RIO %r8
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+#define ymm0_x xmm0
+#define ymm1_x xmm1
+#define ymm2_x xmm2
+#define ymm3_x xmm3
+#define ymm4_x xmm4
+#define ymm5_x xmm5
+#define ymm6_x xmm6
+#define ymm7_x xmm7
+#define ymm8_x xmm8
+#define ymm9_x xmm9
+#define ymm10_x xmm10
+#define ymm11_x xmm11
+#define ymm12_x xmm12
+#define ymm13_x xmm13
+#define ymm14_x xmm14
+#define ymm15_x xmm15
+
+/**********************************************************************
+ 32-way camellia
+ **********************************************************************/
+
+/*
+ * IN:
+ * x0..x7: byte-sliced AB state
+ * mem_cd: register pointer storing CD state
+ * key: index for key material
+ * OUT:
+ * x0..x7: new byte-sliced CD state
+ */
+#define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
+ t7, mem_cd, key) \
+ /* \
+ * S-function with AES subbytes \
+ */ \
+ vbroadcasti128 .Linv_shift_row rRIP, t4; \
+ vpbroadcastd .L0f0f0f0f rRIP, t7; \
+ vbroadcasti128 .Lpre_tf_lo_s1 rRIP, t5; \
+ vbroadcasti128 .Lpre_tf_hi_s1 rRIP, t6; \
+ vbroadcasti128 .Lpre_tf_lo_s4 rRIP, t2; \
+ vbroadcasti128 .Lpre_tf_hi_s4 rRIP, t3; \
+ \
+ /* AES inverse shift rows */ \
+ vpshufb t4, x0, x0; \
+ vpshufb t4, x7, x7; \
+ vpshufb t4, x3, x3; \
+ vpshufb t4, x6, x6; \
+ vpshufb t4, x2, x2; \
+ vpshufb t4, x5, x5; \
+ vpshufb t4, x1, x1; \
+ vpshufb t4, x4, x4; \
+ \
+ /* prefilter sboxes 1, 2 and 3 */ \
+ /* prefilter sbox 4 */ \
+ filter_8bit(x0, t5, t6, t7, t4); \
+ filter_8bit(x7, t5, t6, t7, t4); \
+ vextracti128 $1, x0, t0##_x; \
+ vextracti128 $1, x7, t1##_x; \
+ filter_8bit(x3, t2, t3, t7, t4); \
+ filter_8bit(x6, t2, t3, t7, t4); \
+ vextracti128 $1, x3, t3##_x; \
+ vextracti128 $1, x6, t2##_x; \
+ filter_8bit(x2, t5, t6, t7, t4); \
+ filter_8bit(x5, t5, t6, t7, t4); \
+ filter_8bit(x1, t5, t6, t7, t4); \
+ filter_8bit(x4, t5, t6, t7, t4); \
+ \
+ vpxor t4##_x, t4##_x, t4##_x; \
+ \
+ /* AES subbytes + AES shift rows */ \
+ vextracti128 $1, x2, t6##_x; \
+ vextracti128 $1, x5, t5##_x; \
+ vaesenclast t4##_x, x0##_x, x0##_x; \
+ vaesenclast t4##_x, t0##_x, t0##_x; \
+ vaesenclast t4##_x, x7##_x, x7##_x; \
+ vaesenclast t4##_x, t1##_x, t1##_x; \
+ vaesenclast t4##_x, x3##_x, x3##_x; \
+ vaesenclast t4##_x, t3##_x, t3##_x; \
+ vaesenclast t4##_x, x6##_x, x6##_x; \
+ vaesenclast t4##_x, t2##_x, t2##_x; \
+ vinserti128 $1, t0##_x, x0, x0; \
+ vinserti128 $1, t1##_x, x7, x7; \
+ vinserti128 $1, t3##_x, x3, x3; \
+ vinserti128 $1, t2##_x, x6, x6; \
+ vextracti128 $1, x1, t3##_x; \
+ vextracti128 $1, x4, t2##_x; \
+ vbroadcasti128 .Lpost_tf_lo_s1 rRIP, t0; \
+ vbroadcasti128 .Lpost_tf_hi_s1 rRIP, t1; \
+ vaesenclast t4##_x, x2##_x, x2##_x; \
+ vaesenclast t4##_x, t6##_x, t6##_x; \
+ vaesenclast t4##_x, x5##_x, x5##_x; \
+ vaesenclast t4##_x, t5##_x, t5##_x; \
+ vaesenclast t4##_x, x1##_x, x1##_x; \
+ vaesenclast t4##_x, t3##_x, t3##_x; \
+ vaesenclast t4##_x, x4##_x, x4##_x; \
+ vaesenclast t4##_x, t2##_x, t2##_x; \
+ vinserti128 $1, t6##_x, x2, x2; \
+ vinserti128 $1, t5##_x, x5, x5; \
+ vinserti128 $1, t3##_x, x1, x1; \
+ vinserti128 $1, t2##_x, x4, x4; \
+ \
+ /* postfilter sboxes 1 and 4 */ \
+ vbroadcasti128 .Lpost_tf_lo_s3 rRIP, t2; \
+ vbroadcasti128 .Lpost_tf_hi_s3 rRIP, t3; \
+ filter_8bit(x0, t0, t1, t7, t4); \
+ filter_8bit(x7, t0, t1, t7, t4); \
+ filter_8bit(x3, t0, t1, t7, t6); \
+ filter_8bit(x6, t0, t1, t7, t6); \
+ \
+ /* postfilter sbox 3 */ \
+ vbroadcasti128 .Lpost_tf_lo_s2 rRIP, t4; \
+ vbroadcasti128 .Lpost_tf_hi_s2 rRIP, t5; \
+ filter_8bit(x2, t2, t3, t7, t6); \
+ filter_8bit(x5, t2, t3, t7, t6); \
+ \
+ vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \
+ \
+ /* postfilter sbox 2 */ \
+ filter_8bit(x1, t4, t5, t7, t2); \
+ filter_8bit(x4, t4, t5, t7, t2); \
+ vpxor t7, t7, t7; \
+ \
+ vpsrldq $1, t0, t1; \
+ vpsrldq $2, t0, t2; \
+ vpshufb t7, t1, t1; \
+ vpsrldq $3, t0, t3; \
+ \
+ /* P-function */ \
+ vpxor x5, x0, x0; \
+ vpxor x6, x1, x1; \
+ vpxor x7, x2, x2; \
+ vpxor x4, x3, x3; \
+ \
+ vpshufb t7, t2, t2; \
+ vpsrldq $4, t0, t4; \
+ vpshufb t7, t3, t3; \
+ vpsrldq $5, t0, t5; \
+ vpshufb t7, t4, t4; \
+ \
+ vpxor x2, x4, x4; \
+ vpxor x3, x5, x5; \
+ vpxor x0, x6, x6; \
+ vpxor x1, x7, x7; \
+ \
+ vpsrldq $6, t0, t6; \
+ vpshufb t7, t5, t5; \
+ vpshufb t7, t6, t6; \
+ \
+ vpxor x7, x0, x0; \
+ vpxor x4, x1, x1; \
+ vpxor x5, x2, x2; \
+ vpxor x6, x3, x3; \
+ \
+ vpxor x3, x4, x4; \
+ vpxor x0, x5, x5; \
+ vpxor x1, x6, x6; \
+ vpxor x2, x7, x7; /* note: high and low parts swapped */ \
+ \
+ /* Add key material and result to CD (x becomes new CD) */ \
+ \
+ vpxor t6, x1, x1; \
+ vpxor 5 * 32(mem_cd), x1, x1; \
+ \
+ vpsrldq $7, t0, t6; \
+ vpshufb t7, t0, t0; \
+ vpshufb t7, t6, t7; \
+ \
+ vpxor t7, x0, x0; \
+ vpxor 4 * 32(mem_cd), x0, x0; \
+ \
+ vpxor t5, x2, x2; \
+ vpxor 6 * 32(mem_cd), x2, x2; \
+ \
+ vpxor t4, x3, x3; \
+ vpxor 7 * 32(mem_cd), x3, x3; \
+ \
+ vpxor t3, x4, x4; \
+ vpxor 0 * 32(mem_cd), x4, x4; \
+ \
+ vpxor t2, x5, x5; \
+ vpxor 1 * 32(mem_cd), x5, x5; \
+ \
+ vpxor t1, x6, x6; \
+ vpxor 2 * 32(mem_cd), x6, x6; \
+ \
+ vpxor t0, x7, x7; \
+ vpxor 3 * 32(mem_cd), x7, x7;
+
+/*
+ * IN/OUT:
+ * x0..x7: byte-sliced AB state preloaded
+ * mem_ab: byte-sliced AB state in memory
+ * mem_cb: byte-sliced CD state in memory
+ */
+#define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
+ roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \
+ \
+ vmovdqu x0, 4 * 32(mem_cd); \
+ vmovdqu x1, 5 * 32(mem_cd); \
+ vmovdqu x2, 6 * 32(mem_cd); \
+ vmovdqu x3, 7 * 32(mem_cd); \
+ vmovdqu x4, 0 * 32(mem_cd); \
+ vmovdqu x5, 1 * 32(mem_cd); \
+ vmovdqu x6, 2 * 32(mem_cd); \
+ vmovdqu x7, 3 * 32(mem_cd); \
+ \
+ roundsm32(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \
+ \
+ store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
+
+#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
+
+#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
+ /* Store new AB state */ \
+ vmovdqu x4, 4 * 32(mem_ab); \
+ vmovdqu x5, 5 * 32(mem_ab); \
+ vmovdqu x6, 6 * 32(mem_ab); \
+ vmovdqu x7, 7 * 32(mem_ab); \
+ vmovdqu x0, 0 * 32(mem_ab); \
+ vmovdqu x1, 1 * 32(mem_ab); \
+ vmovdqu x2, 2 * 32(mem_ab); \
+ vmovdqu x3, 3 * 32(mem_ab);
+
+#define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i) \
+ two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
+ two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
+ two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
+
+#define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i) \
+ two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
+ two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
+ two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
+
+/*
+ * IN:
+ * v0..3: byte-sliced 32-bit integers
+ * OUT:
+ * v0..3: (IN <<< 1)
+ */
+#define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \
+ vpcmpgtb v0, zero, t0; \
+ vpaddb v0, v0, v0; \
+ vpabsb t0, t0; \
+ \
+ vpcmpgtb v1, zero, t1; \
+ vpaddb v1, v1, v1; \
+ vpabsb t1, t1; \
+ \
+ vpcmpgtb v2, zero, t2; \
+ vpaddb v2, v2, v2; \
+ vpabsb t2, t2; \
+ \
+ vpor t0, v1, v1; \
+ \
+ vpcmpgtb v3, zero, t0; \
+ vpaddb v3, v3, v3; \
+ vpabsb t0, t0; \
+ \
+ vpor t1, v2, v2; \
+ vpor t2, v3, v3; \
+ vpor t0, v0, v0;
+
+/*
+ * IN:
+ * r: byte-sliced AB state in memory
+ * l: byte-sliced CD state in memory
+ * OUT:
+ * x0..x7: new byte-sliced CD state
+ */
+#define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
+ tt1, tt2, tt3, kll, klr, krl, krr) \
+ /* \
+ * t0 = kll; \
+ * t0 &= ll; \
+ * lr ^= rol32(t0, 1); \
+ */ \
+ vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
+ vpxor tt0, tt0, tt0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpand l0, t0, t0; \
+ vpand l1, t1, t1; \
+ vpand l2, t2, t2; \
+ vpand l3, t3, t3; \
+ \
+ rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+ \
+ vpxor l4, t0, l4; \
+ vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
+ vmovdqu l4, 4 * 32(l); \
+ vpxor l5, t1, l5; \
+ vmovdqu l5, 5 * 32(l); \
+ vpxor l6, t2, l6; \
+ vmovdqu l6, 6 * 32(l); \
+ vpxor l7, t3, l7; \
+ vmovdqu l7, 7 * 32(l); \
+ \
+ /* \
+ * t2 = krr; \
+ * t2 |= rr; \
+ * rl ^= t2; \
+ */ \
+ \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpor 4 * 32(r), t0, t0; \
+ vpor 5 * 32(r), t1, t1; \
+ vpor 6 * 32(r), t2, t2; \
+ vpor 7 * 32(r), t3, t3; \
+ \
+ vpxor 0 * 32(r), t0, t0; \
+ vpxor 1 * 32(r), t1, t1; \
+ vpxor 2 * 32(r), t2, t2; \
+ vpxor 3 * 32(r), t3, t3; \
+ vmovdqu t0, 0 * 32(r); \
+ vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
+ vmovdqu t1, 1 * 32(r); \
+ vmovdqu t2, 2 * 32(r); \
+ vmovdqu t3, 3 * 32(r); \
+ \
+ /* \
+ * t2 = krl; \
+ * t2 &= rl; \
+ * rr ^= rol32(t2, 1); \
+ */ \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpand 0 * 32(r), t0, t0; \
+ vpand 1 * 32(r), t1, t1; \
+ vpand 2 * 32(r), t2, t2; \
+ vpand 3 * 32(r), t3, t3; \
+ \
+ rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+ \
+ vpxor 4 * 32(r), t0, t0; \
+ vpxor 5 * 32(r), t1, t1; \
+ vpxor 6 * 32(r), t2, t2; \
+ vpxor 7 * 32(r), t3, t3; \
+ vmovdqu t0, 4 * 32(r); \
+ vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
+ vmovdqu t1, 5 * 32(r); \
+ vmovdqu t2, 6 * 32(r); \
+ vmovdqu t3, 7 * 32(r); \
+ \
+ /* \
+ * t0 = klr; \
+ * t0 |= lr; \
+ * ll ^= t0; \
+ */ \
+ \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpor l4, t0, t0; \
+ vpor l5, t1, t1; \
+ vpor l6, t2, t2; \
+ vpor l7, t3, t3; \
+ \
+ vpxor l0, t0, l0; \
+ vmovdqu l0, 0 * 32(l); \
+ vpxor l1, t1, l1; \
+ vmovdqu l1, 1 * 32(l); \
+ vpxor l2, t2, l2; \
+ vmovdqu l2, 2 * 32(l); \
+ vpxor l3, t3, l3; \
+ vmovdqu l3, 3 * 32(l);
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \
+ a3, b3, c3, d3, st0, st1) \
+ vmovdqu d2, st0; \
+ vmovdqu d3, st1; \
+ transpose_4x4(a0, a1, a2, a3, d2, d3); \
+ transpose_4x4(b0, b1, b2, b3, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu a0, st0; \
+ vmovdqu a1, st1; \
+ transpose_4x4(c0, c1, c2, c3, a0, a1); \
+ transpose_4x4(d0, d1, d2, d3, a0, a1); \
+ \
+ vbroadcasti128 .Lshufb_16x16b rRIP, a0; \
+ vmovdqu st1, a1; \
+ vpshufb a0, a2, a2; \
+ vpshufb a0, a3, a3; \
+ vpshufb a0, b0, b0; \
+ vpshufb a0, b1, b1; \
+ vpshufb a0, b2, b2; \
+ vpshufb a0, b3, b3; \
+ vpshufb a0, a1, a1; \
+ vpshufb a0, c0, c0; \
+ vpshufb a0, c1, c1; \
+ vpshufb a0, c2, c2; \
+ vpshufb a0, c3, c3; \
+ vpshufb a0, d0, d0; \
+ vpshufb a0, d1, d1; \
+ vpshufb a0, d2, d2; \
+ vpshufb a0, d3, d3; \
+ vmovdqu d3, st1; \
+ vmovdqu st0, d3; \
+ vpshufb a0, d3, a0; \
+ vmovdqu d2, st0; \
+ \
+ transpose_4x4(a0, b0, c0, d0, d2, d3); \
+ transpose_4x4(a1, b1, c1, d1, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu b0, st0; \
+ vmovdqu b1, st1; \
+ transpose_4x4(a2, b2, c2, d2, b0, b1); \
+ transpose_4x4(a3, b3, c3, d3, b0, b1); \
+ vmovdqu st0, b0; \
+ vmovdqu st1, b1; \
+ /* does not adjust output bytes inside vectors */
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, rio, key) \
+ vpbroadcastq key, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
+ \
+ vpxor 0 * 32(rio), x0, y7; \
+ vpxor 1 * 32(rio), x0, y6; \
+ vpxor 2 * 32(rio), x0, y5; \
+ vpxor 3 * 32(rio), x0, y4; \
+ vpxor 4 * 32(rio), x0, y3; \
+ vpxor 5 * 32(rio), x0, y2; \
+ vpxor 6 * 32(rio), x0, y1; \
+ vpxor 7 * 32(rio), x0, y0; \
+ vpxor 8 * 32(rio), x0, x7; \
+ vpxor 9 * 32(rio), x0, x6; \
+ vpxor 10 * 32(rio), x0, x5; \
+ vpxor 11 * 32(rio), x0, x4; \
+ vpxor 12 * 32(rio), x0, x3; \
+ vpxor 13 * 32(rio), x0, x2; \
+ vpxor 14 * 32(rio), x0, x1; \
+ vpxor 15 * 32(rio), x0, x0;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd) \
+ byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \
+ y4, y5, y6, y7, (mem_ab), (mem_cd)); \
+ \
+ vmovdqu x0, 0 * 32(mem_ab); \
+ vmovdqu x1, 1 * 32(mem_ab); \
+ vmovdqu x2, 2 * 32(mem_ab); \
+ vmovdqu x3, 3 * 32(mem_ab); \
+ vmovdqu x4, 4 * 32(mem_ab); \
+ vmovdqu x5, 5 * 32(mem_ab); \
+ vmovdqu x6, 6 * 32(mem_ab); \
+ vmovdqu x7, 7 * 32(mem_ab); \
+ vmovdqu y0, 0 * 32(mem_cd); \
+ vmovdqu y1, 1 * 32(mem_cd); \
+ vmovdqu y2, 2 * 32(mem_cd); \
+ vmovdqu y3, 3 * 32(mem_cd); \
+ vmovdqu y4, 4 * 32(mem_cd); \
+ vmovdqu y5, 5 * 32(mem_cd); \
+ vmovdqu y6, 6 * 32(mem_cd); \
+ vmovdqu y7, 7 * 32(mem_cd);
+
+/* de-byteslice, apply post-whitening and store blocks */
+#define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
+ y5, y6, y7, key, stack_tmp0, stack_tmp1) \
+ byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \
+ y3, y7, x3, x7, stack_tmp0, stack_tmp1); \
+ \
+ vmovdqu x0, stack_tmp0; \
+ \
+ vpbroadcastq key, x0; \
+ vpshufb .Lpack_bswap rRIP, x0, x0; \
+ \
+ vpxor x0, y7, y7; \
+ vpxor x0, y6, y6; \
+ vpxor x0, y5, y5; \
+ vpxor x0, y4, y4; \
+ vpxor x0, y3, y3; \
+ vpxor x0, y2, y2; \
+ vpxor x0, y1, y1; \
+ vpxor x0, y0, y0; \
+ vpxor x0, x7, x7; \
+ vpxor x0, x6, x6; \
+ vpxor x0, x5, x5; \
+ vpxor x0, x4, x4; \
+ vpxor x0, x3, x3; \
+ vpxor x0, x2, x2; \
+ vpxor x0, x1, x1; \
+ vpxor stack_tmp0, x0, x0;
+
+#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, rio) \
+ vmovdqu x0, 0 * 32(rio); \
+ vmovdqu x1, 1 * 32(rio); \
+ vmovdqu x2, 2 * 32(rio); \
+ vmovdqu x3, 3 * 32(rio); \
+ vmovdqu x4, 4 * 32(rio); \
+ vmovdqu x5, 5 * 32(rio); \
+ vmovdqu x6, 6 * 32(rio); \
+ vmovdqu x7, 7 * 32(rio); \
+ vmovdqu y0, 8 * 32(rio); \
+ vmovdqu y1, 9 * 32(rio); \
+ vmovdqu y2, 10 * 32(rio); \
+ vmovdqu y3, 11 * 32(rio); \
+ vmovdqu y4, 12 * 32(rio); \
+ vmovdqu y5, 13 * 32(rio); \
+ vmovdqu y6, 14 * 32(rio); \
+ vmovdqu y7, 15 * 32(rio);
+
+.text
+.align 32
+
+#define SHUFB_BYTES(idx) \
+ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+
+.Lshufb_16x16b:
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+
+.Lpack_bswap:
+ .long 0x00010203, 0x04050607, 0x80808080, 0x80808080
+ .long 0x00010203, 0x04050607, 0x80808080, 0x80808080
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox1, sbox2, sbox3:
+ * swap_bitendianness(
+ * isom_map_camellia_to_aes(
+ * camellia_f(
+ * swap_bitendianess(in)
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s1:
+ .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
+ .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
+.Lpre_tf_hi_s1:
+ .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
+ .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox4:
+ * swap_bitendianness(
+ * isom_map_camellia_to_aes(
+ * camellia_f(
+ * swap_bitendianess(in <<< 1)
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s4:
+ .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
+ .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
+.Lpre_tf_hi_s4:
+ .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
+ .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox1, sbox4:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s1:
+ .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
+ .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
+.Lpost_tf_hi_s1:
+ .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
+ .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox2:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * ) <<< 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s2:
+ .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
+ .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
+.Lpost_tf_hi_s2:
+ .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
+ .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox3:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * ) >>> 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s3:
+ .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
+ .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
+.Lpost_tf_hi_s3:
+ .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
+ .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+
+.align 8
+ELF(.type __camellia_enc_blk32,@function;)
+
+__camellia_enc_blk32:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rax: temporary storage, 512 bytes
+ * %r8d: 24 for 16 byte key, 32 for larger
+ * %ymm0..%ymm15: 32 plaintext blocks
+ * output:
+ * %ymm0..%ymm15: 32 encrypted blocks, order swapped:
+ * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+ */
+ CFI_STARTPROC();
+
+ leaq 8 * 32(%rax), %rcx;
+
+ leaq (-8 * 8)(CTX, %r8, 8), %r8;
+
+ inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %rcx);
+
+.align 8
+.Lenc_loop:
+ enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %rcx, 0);
+
+ cmpq %r8, CTX;
+ je .Lenc_done;
+ leaq (8 * 8)(CTX), CTX;
+
+ fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15,
+ ((key_table) + 0)(CTX),
+ ((key_table) + 4)(CTX),
+ ((key_table) + 8)(CTX),
+ ((key_table) + 12)(CTX));
+ jmp .Lenc_loop;
+
+.align 8
+.Lenc_done:
+ /* load CD for output */
+ vmovdqu 0 * 32(%rcx), %ymm8;
+ vmovdqu 1 * 32(%rcx), %ymm9;
+ vmovdqu 2 * 32(%rcx), %ymm10;
+ vmovdqu 3 * 32(%rcx), %ymm11;
+ vmovdqu 4 * 32(%rcx), %ymm12;
+ vmovdqu 5 * 32(%rcx), %ymm13;
+ vmovdqu 6 * 32(%rcx), %ymm14;
+ vmovdqu 7 * 32(%rcx), %ymm15;
+
+ outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 32(%rax));
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;)
+
+.align 8
+ELF(.type __camellia_dec_blk32,@function;)
+
+__camellia_dec_blk32:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rax: temporary storage, 512 bytes
+ * %r8d: 24 for 16 byte key, 32 for larger
+ * %ymm0..%ymm15: 16 encrypted blocks
+ * output:
+ * %ymm0..%ymm15: 16 plaintext blocks, order swapped:
+ * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+ */
+ CFI_STARTPROC();
+
+ movq %r8, %rcx;
+ movq CTX, %r8
+ leaq (-8 * 8)(CTX, %rcx, 8), CTX;
+
+ leaq 8 * 32(%rax), %rcx;
+
+ inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %rcx);
+
+.align 8
+.Ldec_loop:
+ dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %rcx, 0);
+
+ cmpq %r8, CTX;
+ je .Ldec_done;
+
+ fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15,
+ ((key_table) + 8)(CTX),
+ ((key_table) + 12)(CTX),
+ ((key_table) + 0)(CTX),
+ ((key_table) + 4)(CTX));
+
+ leaq (-8 * 8)(CTX), CTX;
+ jmp .Ldec_loop;
+
+.align 8
+.Ldec_done:
+ /* load CD for output */
+ vmovdqu 0 * 32(%rcx), %ymm8;
+ vmovdqu 1 * 32(%rcx), %ymm9;
+ vmovdqu 2 * 32(%rcx), %ymm10;
+ vmovdqu 3 * 32(%rcx), %ymm11;
+ vmovdqu 4 * 32(%rcx), %ymm12;
+ vmovdqu 5 * 32(%rcx), %ymm13;
+ vmovdqu 6 * 32(%rcx), %ymm14;
+ vmovdqu 7 * 32(%rcx), %ymm15;
+
+ outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_camellia_aesni_avx2_ctr_enc
+ELF(.type _gcry_camellia_aesni_avx2_ctr_enc,@function;)
+
+_gcry_camellia_aesni_avx2_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (32 blocks)
+ * %rdx: src (32 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ movq 8(%rcx), %r11;
+ bswapq %r11;
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ subq $(16 * 32), %rsp;
+ andq $~63, %rsp;
+ movq %rsp, %rax;
+
+ vpcmpeqd %ymm15, %ymm15, %ymm15;
+ vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), %xmm0;
+ vpshufb .Lbswap128_mask rRIP, %xmm0, %xmm0;
+ vmovdqa %xmm0, %xmm1;
+ inc_le128(%xmm0, %xmm15, %xmm14);
+ vbroadcasti128 .Lbswap128_mask rRIP, %ymm14;
+ vinserti128 $1, %xmm0, %ymm1, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm13;
+ vmovdqu %ymm13, 15 * 32(%rax);
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 32), %r11;
+ ja .Lload_ctr_carry;
+
+ /* construct IVs */
+ vpaddq %ymm15, %ymm15, %ymm15; /* ab: -2:0 ; cd: -2:0 */
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm13;
+ vmovdqu %ymm13, 14 * 32(%rax);
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm13;
+ vmovdqu %ymm13, 13 * 32(%rax);
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm12;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm11;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm10;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm9;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm8;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm7;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm6;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm5;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm4;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm3;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm2;
+ vpsubq %ymm15, %ymm0, %ymm0;
+ vpshufb %ymm14, %ymm0, %ymm1;
+ vpsubq %ymm15, %ymm0, %ymm0; /* +30 ; +31 */
+ vpsubq %xmm15, %xmm0, %xmm13; /* +32 */
+ vpshufb %ymm14, %ymm0, %ymm0;
+ vpshufb %xmm14, %xmm13, %xmm13;
+ vmovdqu %xmm13, (%rcx);
+
+ jmp .Lload_ctr_done;
+
+.align 4
+.Lload_ctr_carry:
+ /* construct IVs */
+ inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le1 ; cd: le2 */
+ inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le2 ; cd: le3 */
+ vpshufb %ymm14, %ymm0, %ymm13;
+ vmovdqu %ymm13, 14 * 32(%rax);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm13;
+ vmovdqu %ymm13, 13 * 32(%rax);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm12;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm11;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm10;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm9;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm8;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm7;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm6;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm5;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm4;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm3;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm2;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vpshufb %ymm14, %ymm0, %ymm1;
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ inc_le128(%ymm0, %ymm15, %ymm13);
+ vextracti128 $1, %ymm0, %xmm13;
+ vpshufb %ymm14, %ymm0, %ymm0;
+ inc_le128(%xmm13, %xmm15, %xmm14);
+ vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13;
+ vmovdqu %xmm13, (%rcx);
+
+.align 4
+.Lload_ctr_done:
+ /* inpack16_pre: */
+ vpbroadcastq (key_table)(CTX), %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+ vpxor %ymm0, %ymm15, %ymm0;
+ vpxor %ymm1, %ymm15, %ymm1;
+ vpxor %ymm2, %ymm15, %ymm2;
+ vpxor %ymm3, %ymm15, %ymm3;
+ vpxor %ymm4, %ymm15, %ymm4;
+ vpxor %ymm5, %ymm15, %ymm5;
+ vpxor %ymm6, %ymm15, %ymm6;
+ vpxor %ymm7, %ymm15, %ymm7;
+ vpxor %ymm8, %ymm15, %ymm8;
+ vpxor %ymm9, %ymm15, %ymm9;
+ vpxor %ymm10, %ymm15, %ymm10;
+ vpxor %ymm11, %ymm15, %ymm11;
+ vpxor %ymm12, %ymm15, %ymm12;
+ vpxor 13 * 32(%rax), %ymm15, %ymm13;
+ vpxor 14 * 32(%rax), %ymm15, %ymm14;
+ vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+ call __camellia_enc_blk32;
+
+ vpxor 0 * 32(%rdx), %ymm7, %ymm7;
+ vpxor 1 * 32(%rdx), %ymm6, %ymm6;
+ vpxor 2 * 32(%rdx), %ymm5, %ymm5;
+ vpxor 3 * 32(%rdx), %ymm4, %ymm4;
+ vpxor 4 * 32(%rdx), %ymm3, %ymm3;
+ vpxor 5 * 32(%rdx), %ymm2, %ymm2;
+ vpxor 6 * 32(%rdx), %ymm1, %ymm1;
+ vpxor 7 * 32(%rdx), %ymm0, %ymm0;
+ vpxor 8 * 32(%rdx), %ymm15, %ymm15;
+ vpxor 9 * 32(%rdx), %ymm14, %ymm14;
+ vpxor 10 * 32(%rdx), %ymm13, %ymm13;
+ vpxor 11 * 32(%rdx), %ymm12, %ymm12;
+ vpxor 12 * 32(%rdx), %ymm11, %ymm11;
+ vpxor 13 * 32(%rdx), %ymm10, %ymm10;
+ vpxor 14 * 32(%rdx), %ymm9, %ymm9;
+ vpxor 15 * 32(%rdx), %ymm8, %ymm8;
+ leaq 32 * 16(%rdx), %rdx;
+
+ write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+ %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+ %ymm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx2_cbc_dec
+ELF(.type _gcry_camellia_aesni_avx2_cbc_dec,@function;)
+
+_gcry_camellia_aesni_avx2_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (32 blocks)
+ * %rdx: src (32 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ movq %rcx, %r9;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ subq $(16 * 32), %rsp;
+ andq $~63, %rsp;
+ movq %rsp, %rax;
+
+ inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx, (key_table)(CTX, %r8, 8));
+
+ call __camellia_dec_blk32;
+
+ /* XOR output with IV */
+ vmovdqu %ymm8, (%rax);
+ vmovdqu (%r9), %xmm8;
+ vinserti128 $1, (%rdx), %ymm8, %ymm8;
+ vpxor %ymm8, %ymm7, %ymm7;
+ vmovdqu (%rax), %ymm8;
+ vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6;
+ vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5;
+ vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4;
+ vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3;
+ vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2;
+ vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1;
+ vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0;
+ vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15;
+ vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14;
+ vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13;
+ vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12;
+ vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11;
+ vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10;
+ vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9;
+ vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8;
+ movq (15 * 32 + 16 + 0)(%rdx), %rax;
+ movq (15 * 32 + 16 + 8)(%rdx), %rcx;
+
+ write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+ %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+ %ymm8, %rsi);
+
+ /* store new IV */
+ movq %rax, (0)(%r9);
+ movq %rcx, (8)(%r9);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx2_cfb_dec
+ELF(.type _gcry_camellia_aesni_avx2_cfb_dec,@function;)
+
+_gcry_camellia_aesni_avx2_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (32 blocks)
+ * %rdx: src (32 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ subq $(16 * 32), %rsp;
+ andq $~63, %rsp;
+ movq %rsp, %rax;
+
+ /* inpack16_pre: */
+ vpbroadcastq (key_table)(CTX), %ymm0;
+ vpshufb .Lpack_bswap rRIP, %ymm0, %ymm0;
+ vmovdqu (%rcx), %xmm15;
+ vinserti128 $1, (%rdx), %ymm15, %ymm15;
+ vpxor %ymm15, %ymm0, %ymm15;
+ vmovdqu (15 * 32 + 16)(%rdx), %xmm1;
+ vmovdqu %xmm1, (%rcx); /* store new IV */
+ vpxor (0 * 32 + 16)(%rdx), %ymm0, %ymm14;
+ vpxor (1 * 32 + 16)(%rdx), %ymm0, %ymm13;
+ vpxor (2 * 32 + 16)(%rdx), %ymm0, %ymm12;
+ vpxor (3 * 32 + 16)(%rdx), %ymm0, %ymm11;
+ vpxor (4 * 32 + 16)(%rdx), %ymm0, %ymm10;
+ vpxor (5 * 32 + 16)(%rdx), %ymm0, %ymm9;
+ vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm8;
+ vpxor (7 * 32 + 16)(%rdx), %ymm0, %ymm7;
+ vpxor (8 * 32 + 16)(%rdx), %ymm0, %ymm6;
+ vpxor (9 * 32 + 16)(%rdx), %ymm0, %ymm5;
+ vpxor (10 * 32 + 16)(%rdx), %ymm0, %ymm4;
+ vpxor (11 * 32 + 16)(%rdx), %ymm0, %ymm3;
+ vpxor (12 * 32 + 16)(%rdx), %ymm0, %ymm2;
+ vpxor (13 * 32 + 16)(%rdx), %ymm0, %ymm1;
+ vpxor (14 * 32 + 16)(%rdx), %ymm0, %ymm0;
+
+ call __camellia_enc_blk32;
+
+ vpxor 0 * 32(%rdx), %ymm7, %ymm7;
+ vpxor 1 * 32(%rdx), %ymm6, %ymm6;
+ vpxor 2 * 32(%rdx), %ymm5, %ymm5;
+ vpxor 3 * 32(%rdx), %ymm4, %ymm4;
+ vpxor 4 * 32(%rdx), %ymm3, %ymm3;
+ vpxor 5 * 32(%rdx), %ymm2, %ymm2;
+ vpxor 6 * 32(%rdx), %ymm1, %ymm1;
+ vpxor 7 * 32(%rdx), %ymm0, %ymm0;
+ vpxor 8 * 32(%rdx), %ymm15, %ymm15;
+ vpxor 9 * 32(%rdx), %ymm14, %ymm14;
+ vpxor 10 * 32(%rdx), %ymm13, %ymm13;
+ vpxor 11 * 32(%rdx), %ymm12, %ymm12;
+ vpxor 12 * 32(%rdx), %ymm11, %ymm11;
+ vpxor 13 * 32(%rdx), %ymm10, %ymm10;
+ vpxor 14 * 32(%rdx), %ymm9, %ymm9;
+ vpxor 15 * 32(%rdx), %ymm8, %ymm8;
+
+ write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+ %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+ %ymm8, %rsi);
+
+ vzeroall;
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx2_ocb_enc
+ELF(.type _gcry_camellia_aesni_avx2_ocb_enc,@function;)
+
+_gcry_camellia_aesni_avx2_ocb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (32 blocks)
+ * %rdx: src (32 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[32])
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ subq $(16 * 32 + 4 * 8), %rsp;
+ andq $~63, %rsp;
+ movq %rsp, %rax;
+
+ movq %r10, (16 * 32 + 0 * 8)(%rsp);
+ movq %r11, (16 * 32 + 1 * 8)(%rsp);
+ movq %r12, (16 * 32 + 2 * 8)(%rsp);
+ movq %r13, (16 * 32 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
+
+ vmovdqu (%rcx), %xmm14;
+ vmovdqu (%r8), %xmm13;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), %xmm14, %xmm15; \
+ vpxor (l1reg), %xmm15, %xmm14; \
+ vinserti128 $1, %xmm14, %ymm15, %ymm15; \
+ vpxor yreg, %ymm13, %ymm13; \
+ vpxor yreg, %ymm15, yreg; \
+ vmovdqu %ymm15, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, %ymm0);
+ vmovdqu %ymm0, (15 * 32)(%rax);
+ OCB_INPUT(1, %r12, %r13, %ymm0);
+ vmovdqu %ymm0, (14 * 32)(%rax);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, %ymm0);
+ vmovdqu %ymm0, (13 * 32)(%rax);
+ OCB_INPUT(3, %r12, %r13, %ymm12);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, %ymm11);
+ OCB_INPUT(5, %r12, %r13, %ymm10);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, %ymm9);
+ OCB_INPUT(7, %r12, %r13, %ymm8);
+ movq (16 * 8)(%r9), %r10;
+ movq (17 * 8)(%r9), %r11;
+ movq (18 * 8)(%r9), %r12;
+ movq (19 * 8)(%r9), %r13;
+ OCB_INPUT(8, %r10, %r11, %ymm7);
+ OCB_INPUT(9, %r12, %r13, %ymm6);
+ movq (20 * 8)(%r9), %r10;
+ movq (21 * 8)(%r9), %r11;
+ movq (22 * 8)(%r9), %r12;
+ movq (23 * 8)(%r9), %r13;
+ OCB_INPUT(10, %r10, %r11, %ymm5);
+ OCB_INPUT(11, %r12, %r13, %ymm4);
+ movq (24 * 8)(%r9), %r10;
+ movq (25 * 8)(%r9), %r11;
+ movq (26 * 8)(%r9), %r12;
+ movq (27 * 8)(%r9), %r13;
+ OCB_INPUT(12, %r10, %r11, %ymm3);
+ OCB_INPUT(13, %r12, %r13, %ymm2);
+ movq (28 * 8)(%r9), %r10;
+ movq (29 * 8)(%r9), %r11;
+ movq (30 * 8)(%r9), %r12;
+ movq (31 * 8)(%r9), %r13;
+ OCB_INPUT(14, %r10, %r11, %ymm1);
+ OCB_INPUT(15, %r12, %r13, %ymm0);
+#undef OCB_INPUT
+
+ vextracti128 $1, %ymm13, %xmm15;
+ vmovdqu %xmm14, (%rcx);
+ vpxor %xmm13, %xmm15, %xmm15;
+ vmovdqu %xmm15, (%r8);
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %r10d;
+ cmovel %r10d, %r8d; /* max */
+
+ /* inpack16_pre: */
+ vpbroadcastq (key_table)(CTX), %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+ vpxor %ymm0, %ymm15, %ymm0;
+ vpxor %ymm1, %ymm15, %ymm1;
+ vpxor %ymm2, %ymm15, %ymm2;
+ vpxor %ymm3, %ymm15, %ymm3;
+ vpxor %ymm4, %ymm15, %ymm4;
+ vpxor %ymm5, %ymm15, %ymm5;
+ vpxor %ymm6, %ymm15, %ymm6;
+ vpxor %ymm7, %ymm15, %ymm7;
+ vpxor %ymm8, %ymm15, %ymm8;
+ vpxor %ymm9, %ymm15, %ymm9;
+ vpxor %ymm10, %ymm15, %ymm10;
+ vpxor %ymm11, %ymm15, %ymm11;
+ vpxor %ymm12, %ymm15, %ymm12;
+ vpxor 13 * 32(%rax), %ymm15, %ymm13;
+ vpxor 14 * 32(%rax), %ymm15, %ymm14;
+ vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+ call __camellia_enc_blk32;
+
+ vpxor 0 * 32(%rsi), %ymm7, %ymm7;
+ vpxor 1 * 32(%rsi), %ymm6, %ymm6;
+ vpxor 2 * 32(%rsi), %ymm5, %ymm5;
+ vpxor 3 * 32(%rsi), %ymm4, %ymm4;
+ vpxor 4 * 32(%rsi), %ymm3, %ymm3;
+ vpxor 5 * 32(%rsi), %ymm2, %ymm2;
+ vpxor 6 * 32(%rsi), %ymm1, %ymm1;
+ vpxor 7 * 32(%rsi), %ymm0, %ymm0;
+ vpxor 8 * 32(%rsi), %ymm15, %ymm15;
+ vpxor 9 * 32(%rsi), %ymm14, %ymm14;
+ vpxor 10 * 32(%rsi), %ymm13, %ymm13;
+ vpxor 11 * 32(%rsi), %ymm12, %ymm12;
+ vpxor 12 * 32(%rsi), %ymm11, %ymm11;
+ vpxor 13 * 32(%rsi), %ymm10, %ymm10;
+ vpxor 14 * 32(%rsi), %ymm9, %ymm9;
+ vpxor 15 * 32(%rsi), %ymm8, %ymm8;
+
+ write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+ %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+ %ymm8, %rsi);
+
+ vzeroall;
+
+ movq (16 * 32 + 0 * 8)(%rsp), %r10;
+ movq (16 * 32 + 1 * 8)(%rsp), %r11;
+ movq (16 * 32 + 2 * 8)(%rsp), %r12;
+ movq (16 * 32 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx2_ocb_enc,.-_gcry_camellia_aesni_avx2_ocb_enc;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx2_ocb_dec
+ELF(.type _gcry_camellia_aesni_avx2_ocb_dec,@function;)
+
+_gcry_camellia_aesni_avx2_ocb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (32 blocks)
+ * %rdx: src (32 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[32])
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ subq $(16 * 32 + 4 * 8), %rsp;
+ andq $~63, %rsp;
+ movq %rsp, %rax;
+
+ movq %r10, (16 * 32 + 0 * 8)(%rsp);
+ movq %r11, (16 * 32 + 1 * 8)(%rsp);
+ movq %r12, (16 * 32 + 2 * 8)(%rsp);
+ movq %r13, (16 * 32 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
+
+ vmovdqu (%rcx), %xmm14;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), %xmm14, %xmm15; \
+ vpxor (l1reg), %xmm15, %xmm14; \
+ vinserti128 $1, %xmm14, %ymm15, %ymm15; \
+ vpxor yreg, %ymm15, yreg; \
+ vmovdqu %ymm15, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, %ymm0);
+ vmovdqu %ymm0, (15 * 32)(%rax);
+ OCB_INPUT(1, %r12, %r13, %ymm0);
+ vmovdqu %ymm0, (14 * 32)(%rax);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, %ymm13);
+ OCB_INPUT(3, %r12, %r13, %ymm12);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, %ymm11);
+ OCB_INPUT(5, %r12, %r13, %ymm10);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, %ymm9);
+ OCB_INPUT(7, %r12, %r13, %ymm8);
+ movq (16 * 8)(%r9), %r10;
+ movq (17 * 8)(%r9), %r11;
+ movq (18 * 8)(%r9), %r12;
+ movq (19 * 8)(%r9), %r13;
+ OCB_INPUT(8, %r10, %r11, %ymm7);
+ OCB_INPUT(9, %r12, %r13, %ymm6);
+ movq (20 * 8)(%r9), %r10;
+ movq (21 * 8)(%r9), %r11;
+ movq (22 * 8)(%r9), %r12;
+ movq (23 * 8)(%r9), %r13;
+ OCB_INPUT(10, %r10, %r11, %ymm5);
+ OCB_INPUT(11, %r12, %r13, %ymm4);
+ movq (24 * 8)(%r9), %r10;
+ movq (25 * 8)(%r9), %r11;
+ movq (26 * 8)(%r9), %r12;
+ movq (27 * 8)(%r9), %r13;
+ OCB_INPUT(12, %r10, %r11, %ymm3);
+ OCB_INPUT(13, %r12, %r13, %ymm2);
+ movq (28 * 8)(%r9), %r10;
+ movq (29 * 8)(%r9), %r11;
+ movq (30 * 8)(%r9), %r12;
+ movq (31 * 8)(%r9), %r13;
+ OCB_INPUT(14, %r10, %r11, %ymm1);
+ OCB_INPUT(15, %r12, %r13, %ymm0);
+#undef OCB_INPUT
+
+ vmovdqu %xmm14, (%rcx);
+
+ movq %r8, %r10;
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %r9d;
+ cmovel %r9d, %r8d; /* max */
+
+ /* inpack16_pre: */
+ vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+ vpxor %ymm0, %ymm15, %ymm0;
+ vpxor %ymm1, %ymm15, %ymm1;
+ vpxor %ymm2, %ymm15, %ymm2;
+ vpxor %ymm3, %ymm15, %ymm3;
+ vpxor %ymm4, %ymm15, %ymm4;
+ vpxor %ymm5, %ymm15, %ymm5;
+ vpxor %ymm6, %ymm15, %ymm6;
+ vpxor %ymm7, %ymm15, %ymm7;
+ vpxor %ymm8, %ymm15, %ymm8;
+ vpxor %ymm9, %ymm15, %ymm9;
+ vpxor %ymm10, %ymm15, %ymm10;
+ vpxor %ymm11, %ymm15, %ymm11;
+ vpxor %ymm12, %ymm15, %ymm12;
+ vpxor %ymm13, %ymm15, %ymm13;
+ vpxor 14 * 32(%rax), %ymm15, %ymm14;
+ vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+ call __camellia_dec_blk32;
+
+ vpxor 0 * 32(%rsi), %ymm7, %ymm7;
+ vpxor 1 * 32(%rsi), %ymm6, %ymm6;
+ vpxor 2 * 32(%rsi), %ymm5, %ymm5;
+ vpxor 3 * 32(%rsi), %ymm4, %ymm4;
+ vpxor 4 * 32(%rsi), %ymm3, %ymm3;
+ vpxor 5 * 32(%rsi), %ymm2, %ymm2;
+ vpxor 6 * 32(%rsi), %ymm1, %ymm1;
+ vpxor 7 * 32(%rsi), %ymm0, %ymm0;
+ vmovdqu %ymm7, (7 * 32)(%rax);
+ vmovdqu %ymm6, (6 * 32)(%rax);
+ vpxor 8 * 32(%rsi), %ymm15, %ymm15;
+ vpxor 9 * 32(%rsi), %ymm14, %ymm14;
+ vpxor 10 * 32(%rsi), %ymm13, %ymm13;
+ vpxor 11 * 32(%rsi), %ymm12, %ymm12;
+ vpxor 12 * 32(%rsi), %ymm11, %ymm11;
+ vpxor 13 * 32(%rsi), %ymm10, %ymm10;
+ vpxor 14 * 32(%rsi), %ymm9, %ymm9;
+ vpxor 15 * 32(%rsi), %ymm8, %ymm8;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+
+ vpxor %ymm5, %ymm7, %ymm7;
+ vpxor %ymm4, %ymm6, %ymm6;
+ vpxor %ymm3, %ymm7, %ymm7;
+ vpxor %ymm2, %ymm6, %ymm6;
+ vpxor %ymm1, %ymm7, %ymm7;
+ vpxor %ymm0, %ymm6, %ymm6;
+ vpxor %ymm15, %ymm7, %ymm7;
+ vpxor %ymm14, %ymm6, %ymm6;
+ vpxor %ymm13, %ymm7, %ymm7;
+ vpxor %ymm12, %ymm6, %ymm6;
+ vpxor %ymm11, %ymm7, %ymm7;
+ vpxor %ymm10, %ymm6, %ymm6;
+ vpxor %ymm9, %ymm7, %ymm7;
+ vpxor %ymm8, %ymm6, %ymm6;
+ vpxor %ymm7, %ymm6, %ymm7;
+
+ vextracti128 $1, %ymm7, %xmm6;
+ vpxor %xmm6, %xmm7, %xmm7;
+ vpxor (%r10), %xmm7, %xmm7;
+ vmovdqu %xmm7, (%r10);
+
+ vmovdqu 7 * 32(%rax), %ymm7;
+ vmovdqu 6 * 32(%rax), %ymm6;
+
+ write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+ %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+ %ymm8, %rsi);
+
+ vzeroall;
+
+ movq (16 * 32 + 0 * 8)(%rsp), %r10;
+ movq (16 * 32 + 1 * 8)(%rsp), %r11;
+ movq (16 * 32 + 2 * 8)(%rsp), %r12;
+ movq (16 * 32 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx2_ocb_dec,.-_gcry_camellia_aesni_avx2_ocb_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx2_ocb_auth
+ELF(.type _gcry_camellia_aesni_avx2_ocb_auth,@function;)
+
+_gcry_camellia_aesni_avx2_ocb_auth:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: abuf (16 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ subq $(16 * 32 + 4 * 8), %rsp;
+ andq $~63, %rsp;
+ movq %rsp, %rax;
+
+ movq %r10, (16 * 32 + 0 * 8)(%rsp);
+ movq %r11, (16 * 32 + 1 * 8)(%rsp);
+ movq %r12, (16 * 32 + 2 * 8)(%rsp);
+ movq %r13, (16 * 32 + 3 * 8)(%rsp);
+ CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+ CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+ CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+ CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
+
+ vmovdqu (%rdx), %xmm14;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rsi), yreg; \
+ vpxor (l0reg), %xmm14, %xmm15; \
+ vpxor (l1reg), %xmm15, %xmm14; \
+ vinserti128 $1, %xmm14, %ymm15, %ymm15; \
+ vpxor yreg, %ymm15, yreg;
+
+ movq (0 * 8)(%r8), %r10;
+ movq (1 * 8)(%r8), %r11;
+ movq (2 * 8)(%r8), %r12;
+ movq (3 * 8)(%r8), %r13;
+ OCB_INPUT(0, %r10, %r11, %ymm0);
+ vmovdqu %ymm0, (15 * 32)(%rax);
+ OCB_INPUT(1, %r12, %r13, %ymm0);
+ vmovdqu %ymm0, (14 * 32)(%rax);
+ movq (4 * 8)(%r8), %r10;
+ movq (5 * 8)(%r8), %r11;
+ movq (6 * 8)(%r8), %r12;
+ movq (7 * 8)(%r8), %r13;
+ OCB_INPUT(2, %r10, %r11, %ymm13);
+ OCB_INPUT(3, %r12, %r13, %ymm12);
+ movq (8 * 8)(%r8), %r10;
+ movq (9 * 8)(%r8), %r11;
+ movq (10 * 8)(%r8), %r12;
+ movq (11 * 8)(%r8), %r13;
+ OCB_INPUT(4, %r10, %r11, %ymm11);
+ OCB_INPUT(5, %r12, %r13, %ymm10);
+ movq (12 * 8)(%r8), %r10;
+ movq (13 * 8)(%r8), %r11;
+ movq (14 * 8)(%r8), %r12;
+ movq (15 * 8)(%r8), %r13;
+ OCB_INPUT(6, %r10, %r11, %ymm9);
+ OCB_INPUT(7, %r12, %r13, %ymm8);
+ movq (16 * 8)(%r8), %r10;
+ movq (17 * 8)(%r8), %r11;
+ movq (18 * 8)(%r8), %r12;
+ movq (19 * 8)(%r8), %r13;
+ OCB_INPUT(8, %r10, %r11, %ymm7);
+ OCB_INPUT(9, %r12, %r13, %ymm6);
+ movq (20 * 8)(%r8), %r10;
+ movq (21 * 8)(%r8), %r11;
+ movq (22 * 8)(%r8), %r12;
+ movq (23 * 8)(%r8), %r13;
+ OCB_INPUT(10, %r10, %r11, %ymm5);
+ OCB_INPUT(11, %r12, %r13, %ymm4);
+ movq (24 * 8)(%r8), %r10;
+ movq (25 * 8)(%r8), %r11;
+ movq (26 * 8)(%r8), %r12;
+ movq (27 * 8)(%r8), %r13;
+ OCB_INPUT(12, %r10, %r11, %ymm3);
+ OCB_INPUT(13, %r12, %r13, %ymm2);
+ movq (28 * 8)(%r8), %r10;
+ movq (29 * 8)(%r8), %r11;
+ movq (30 * 8)(%r8), %r12;
+ movq (31 * 8)(%r8), %r13;
+ OCB_INPUT(14, %r10, %r11, %ymm1);
+ OCB_INPUT(15, %r12, %r13, %ymm0);
+#undef OCB_INPUT
+
+ vmovdqu %xmm14, (%rdx);
+
+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %r10d;
+ cmovel %r10d, %r8d; /* max */
+
+ movq %rcx, %r10;
+
+ /* inpack16_pre: */
+ vpbroadcastq (key_table)(CTX), %ymm15;
+ vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+ vpxor %ymm0, %ymm15, %ymm0;
+ vpxor %ymm1, %ymm15, %ymm1;
+ vpxor %ymm2, %ymm15, %ymm2;
+ vpxor %ymm3, %ymm15, %ymm3;
+ vpxor %ymm4, %ymm15, %ymm4;
+ vpxor %ymm5, %ymm15, %ymm5;
+ vpxor %ymm6, %ymm15, %ymm6;
+ vpxor %ymm7, %ymm15, %ymm7;
+ vpxor %ymm8, %ymm15, %ymm8;
+ vpxor %ymm9, %ymm15, %ymm9;
+ vpxor %ymm10, %ymm15, %ymm10;
+ vpxor %ymm11, %ymm15, %ymm11;
+ vpxor %ymm12, %ymm15, %ymm12;
+ vpxor %ymm13, %ymm15, %ymm13;
+ vpxor 14 * 32(%rax), %ymm15, %ymm14;
+ vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+ call __camellia_enc_blk32;
+
+ vpxor %ymm7, %ymm6, %ymm6;
+ vpxor %ymm5, %ymm4, %ymm4;
+ vpxor %ymm3, %ymm2, %ymm2;
+ vpxor %ymm1, %ymm0, %ymm0;
+ vpxor %ymm15, %ymm14, %ymm14;
+ vpxor %ymm13, %ymm12, %ymm12;
+ vpxor %ymm11, %ymm10, %ymm10;
+ vpxor %ymm9, %ymm8, %ymm8;
+
+ vpxor %ymm6, %ymm4, %ymm4;
+ vpxor %ymm2, %ymm0, %ymm0;
+ vpxor %ymm14, %ymm12, %ymm12;
+ vpxor %ymm10, %ymm8, %ymm8;
+
+ vpxor %ymm4, %ymm0, %ymm0;
+ vpxor %ymm12, %ymm8, %ymm8;
+
+ vpxor %ymm0, %ymm8, %ymm0;
+
+ vextracti128 $1, %ymm0, %xmm1;
+ vpxor (%r10), %xmm0, %xmm0;
+ vpxor %xmm0, %xmm1, %xmm0;
+ vmovdqu %xmm0, (%r10);
+
+ vzeroall;
+
+ movq (16 * 32 + 0 * 8)(%rsp), %r10;
+ movq (16 * 32 + 1 * 8)(%rsp), %r11;
+ movq (16 * 32 + 2 * 8)(%rsp), %r12;
+ movq (16 * 32 + 3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx2_ocb_auth,.-_gcry_camellia_aesni_avx2_ocb_auth;)
+
+#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/camellia-arm.S b/comm/third_party/libgcrypt/cipher/camellia-arm.S
new file mode 100644
index 0000000000..a3d87d1109
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/camellia-arm.S
@@ -0,0 +1,626 @@
+/* camellia-arm.S - ARM assembly implementation of Camellia cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* struct camellia_ctx: */
+#define key_table 0
+
+/* register macros */
+#define CTX %r0
+#define RTAB1 %ip
+#define RTAB3 %r1
+#define RMASK %lr
+
+#define IL %r2
+#define IR %r3
+
+#define XL %r4
+#define XR %r5
+#define YL %r6
+#define YR %r7
+
+#define RT0 %r8
+#define RT1 %r9
+#define RT2 %r10
+#define RT3 %r11
+
+/* helper macros */
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 3)]; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 0)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \
+ mov rtmp0, rin, lsr #8; \
+ strb rin, [rdst, #((offs) + 3)]; \
+ mov rtmp1, rin, lsr #16; \
+ strb rtmp0, [rdst, #((offs) + 2)]; \
+ mov rtmp0, rin, lsr #24; \
+ strb rtmp1, [rdst, #((offs) + 1)]; \
+ strb rtmp0, [rdst, #((offs) + 0)];
+
+#ifdef __ARMEL__
+#ifdef HAVE_ARM_ARCH_V6
+ #define host_to_be(reg, rtmp) \
+ rev reg, reg;
+ #define be_to_host(reg, rtmp) \
+ rev reg, reg;
+#else
+ #define host_to_be(reg, rtmp) \
+ eor rtmp, reg, reg, ror #16; \
+ mov rtmp, rtmp, lsr #8; \
+ bic rtmp, rtmp, #65280; \
+ eor reg, rtmp, reg, ror #8;
+ #define be_to_host(reg, rtmp) \
+ eor rtmp, reg, reg, ror #16; \
+ mov rtmp, rtmp, lsr #8; \
+ bic rtmp, rtmp, #65280; \
+ eor reg, rtmp, reg, ror #8;
+#endif
+#else
+ /* nop on big-endian */
+ #define host_to_be(reg, rtmp) /*_*/
+ #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \
+ ldr a, [rin, #0]; \
+ ldr b, [rin, #4]; \
+ be_to_host(a, rtmp); \
+ ldr c, [rin, #8]; \
+ be_to_host(b, rtmp); \
+ ldr d, [rin, #12]; \
+ be_to_host(c, rtmp); \
+ be_to_host(d, rtmp);
+
+#define str_output_aligned_be(rout, a, b, c, d, rtmp) \
+ be_to_host(a, rtmp); \
+ be_to_host(b, rtmp); \
+ str a, [rout, #0]; \
+ be_to_host(c, rtmp); \
+ str b, [rout, #4]; \
+ be_to_host(d, rtmp); \
+ str c, [rout, #8]; \
+ str d, [rout, #12];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+ /* unaligned word reads/writes allowed */
+ #define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \
+ ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp)
+
+ #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+ str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0)
+#else
+ /* need to handle unaligned reads/writes by byte reads */
+ #define ldr_input_be(rin, ra, rb, rc, rd, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_be(ra, rin, 0, rtmp0); \
+ ldr_unaligned_be(rb, rin, 4, rtmp0); \
+ ldr_unaligned_be(rc, rin, 8, rtmp0); \
+ ldr_unaligned_be(rd, rin, 12, rtmp0); \
+ b 2f; \
+ 1:;\
+ ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp0); \
+ 2:;
+
+ #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_be(ra, rout, 0, rtmp0, rtmp1); \
+ str_unaligned_be(rb, rout, 4, rtmp0, rtmp1); \
+ str_unaligned_be(rc, rout, 8, rtmp0, rtmp1); \
+ str_unaligned_be(rd, rout, 12, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0); \
+ 2:;
+#endif
+
+/**********************************************************************
+ 1-way camellia
+ **********************************************************************/
+#define roundsm(xl, xr, kl, kr, yl, yr) \
+ ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \
+ and IR, RMASK, xr, lsl#(4); /*sp1110*/ \
+ ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \
+ and IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \
+ and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \
+ ldr IR, [RTAB1, IR]; \
+ and RT1, RMASK, xl, lsr#(8 - 4); /*sp3033*/ \
+ eor yl, RT2; \
+ ldr IL, [RTAB1, IL]; \
+ eor yr, RT3; \
+ \
+ ldr RT0, [RTAB3, RT0]; \
+ add RTAB1, #4; \
+ ldr RT1, [RTAB3, RT1]; \
+ add RTAB3, #4; \
+ \
+ and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \
+ and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \
+ \
+ eor IR, RT0; \
+ eor IL, RT1; \
+ \
+ ldr RT2, [RTAB1, RT2]; \
+ and RT0, RMASK, xr, lsr#(8 - 4); /*sp4404*/ \
+ ldr RT3, [RTAB1, RT3]; \
+ and RT1, RMASK, xl, lsl#(4); /*sp4404*/ \
+ \
+ ldr RT0, [RTAB3, RT0]; \
+ sub RTAB1, #4; \
+ ldr RT1, [RTAB3, RT1]; \
+ sub RTAB3, #4; \
+ \
+ eor IR, RT2; \
+ eor IL, RT3; \
+ eor IR, RT0; \
+ eor IL, RT1; \
+ \
+ eor IR, IL; \
+ eor yr, yr, IL, ror#8; \
+ eor yl, IR; \
+ eor yr, IR;
+
+#define enc_rounds(n) \
+ roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR);
+
+#define dec_rounds(n) \
+ roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \
+ roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \
+ roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR);
+
+/* perform FL and FL⁻¹ */
+#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \
+ ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \
+ ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \
+ and RT0, ll; \
+ ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \
+ orr RT2, rr; \
+ ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \
+ eor rl, RT2; \
+ eor lr, lr, RT0, ror#31; \
+ and RT3, rl; \
+ orr RT1, lr; \
+ eor ll, RT1; \
+ eor rr, rr, RT3, ror#31;
+
+#define enc_fls(n) \
+ fls(XL, XR, YL, YR, \
+ (n) * 2 + 0, (n) * 2 + 1, \
+ (n) * 2 + 2, (n) * 2 + 3);
+
+#define dec_fls(n) \
+ fls(XL, XR, YL, YR, \
+ (n) * 2 + 2, (n) * 2 + 3, \
+ (n) * 2 + 0, (n) * 2 + 1);
+
+#define inpack(n) \
+ ldr_input_be(%r2, XL, XR, YL, YR, RT0); \
+ ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+ eor XL, RT0; \
+ eor XR, RT1;
+
+#define outunpack(n) \
+ ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+ eor YL, RT0; \
+ eor YR, RT1; \
+ str_output_be(%r1, YL, YR, XL, XR, RT0, RT1);
+
+.align 3
+.globl _gcry_camellia_arm_encrypt_block
+.type _gcry_camellia_arm_encrypt_block,%function;
+
+_gcry_camellia_arm_encrypt_block:
+ /* input:
+ * %r0: keytable
+ * %r1: dst
+ * %r2: src
+ * %r3: keybitlen
+ */
+ push {%r1, %r4-%r11, %ip, %lr};
+
+ GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3);
+ mov RMASK, #0xff;
+ add RTAB3, RTAB1, #(2 * 4);
+ push {%r3};
+ mov RMASK, RMASK, lsl#4 /* byte mask */
+
+ inpack(0);
+
+ enc_rounds(0);
+ enc_fls(8);
+ enc_rounds(8);
+ enc_fls(16);
+ enc_rounds(16);
+
+ pop {RT0};
+ cmp RT0, #(16 * 8);
+ bne .Lenc_256;
+
+ pop {%r1};
+ outunpack(24);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+
+.Lenc_256:
+ enc_fls(24);
+ enc_rounds(24);
+
+ pop {%r1};
+ outunpack(32);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;
+
+.align 3
+.globl _gcry_camellia_arm_decrypt_block
+.type _gcry_camellia_arm_decrypt_block,%function;
+
+_gcry_camellia_arm_decrypt_block:
+ /* input:
+ * %r0: keytable
+ * %r1: dst
+ * %r2: src
+ * %r3: keybitlen
+ */
+ push {%r1, %r4-%r11, %ip, %lr};
+
+ GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3);
+ mov RMASK, #0xff;
+ add RTAB3, RTAB1, #(2 * 4);
+ mov RMASK, RMASK, lsl#4 /* byte mask */
+
+ cmp %r3, #(16 * 8);
+ bne .Ldec_256;
+
+ inpack(24);
+
+.Ldec_128:
+ dec_rounds(16);
+ dec_fls(16);
+ dec_rounds(8);
+ dec_fls(8);
+ dec_rounds(0);
+
+ pop {%r1};
+ outunpack(0);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+
+.Ldec_256:
+ inpack(32);
+ dec_rounds(24);
+ dec_fls(24);
+
+ b .Ldec_128;
+.ltorg
+.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;
+
+.data
+
+/* Encryption/Decryption tables */
+.align 5
+.Lcamellia_sp1110:
+.long 0x70707000
+.Lcamellia_sp0222:
+ .long 0x00e0e0e0
+.Lcamellia_sp3033:
+ .long 0x38003838
+.Lcamellia_sp4404:
+ .long 0x70700070
+.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c
+.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3
+.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0
+.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4
+.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057
+.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea
+.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae
+.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023
+.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b
+.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045
+.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5
+.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed
+.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f
+.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d
+.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092
+.long 0x23232300, 0x00464646, 0x91009191, 0x86860086
+.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af
+.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c
+.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f
+.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e
+.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc
+.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e
+.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b
+.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6
+.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039
+.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5
+.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d
+.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9
+.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a
+.long 0x92929200, 0x00252525, 0x49004949, 0x51510051
+.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c
+.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b
+.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a
+.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb
+.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0
+.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074
+.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b
+.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0
+.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084
+.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df
+.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb
+.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034
+.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076
+.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d
+.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9
+.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1
+.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004
+.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014
+.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a
+.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de
+.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011
+.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032
+.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c
+.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053
+.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2
+.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe
+.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf
+.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3
+.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a
+.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024
+.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8
+.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060
+.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069
+.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa
+.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0
+.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1
+.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062
+.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054
+.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e
+.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0
+.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064
+.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010
+.long 0x12121200, 0x00242424, 0x09000909, 0x00000000
+.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3
+.long 0x20202000, 0x00404040, 0x10001010, 0x75750075
+.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a
+.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6
+.long 0x84848400, 0x00090909, 0x42004242, 0x09090009
+.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd
+.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087
+.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083
+.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd
+.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090
+.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073
+.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6
+.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d
+.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf
+.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052
+.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8
+.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8
+.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6
+.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081
+.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f
+.long 0x04040400, 0x00080808, 0x02000202, 0x13130013
+.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063
+.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9
+.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7
+.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f
+.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc
+.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029
+.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9
+.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f
+.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4
+.long 0x32323200, 0x00646464, 0x19001919, 0x78780078
+.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006
+.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7
+.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071
+.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4
+.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab
+.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088
+.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d
+.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072
+.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9
+.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8
+.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac
+.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036
+.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a
+.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c
+.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1
+.long 0x24242400, 0x00484848, 0x12001212, 0x40400040
+.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3
+.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb
+.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043
+.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015
+.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad
+.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077
+.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080
+.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082
+.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec
+.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027
+.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5
+.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085
+.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035
+.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c
+.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041
+.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef
+.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093
+.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019
+.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021
+.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e
+.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e
+.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065
+.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd
+.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8
+.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f
+.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb
+.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce
+.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030
+.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f
+.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5
+.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a
+.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1
+.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca
+.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047
+.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d
+.long 0x09090900, 0x00121212, 0x84008484, 0x01010001
+.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6
+.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056
+.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d
+.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d
+.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066
+.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc
+.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d
+.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012
+.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020
+.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1
+.long 0x33333300, 0x00666666, 0x99009999, 0x99990099
+.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c
+.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2
+.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e
+.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005
+.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7
+.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031
+.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017
+.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7
+.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058
+.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061
+.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b
+.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c
+.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f
+.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016
+.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018
+.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022
+.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044
+.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2
+.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5
+.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091
+.long 0x13131300, 0x00262626, 0x89008989, 0x08080008
+.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8
+.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc
+.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050
+.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0
+.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d
+.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089
+.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097
+.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b
+.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095
+.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff
+.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2
+.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4
+.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048
+.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7
+.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db
+.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003
+.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da
+.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f
+.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094
+.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c
+.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002
+.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a
+.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033
+.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067
+.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3
+.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f
+.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2
+.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b
+.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026
+.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037
+.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b
+.long 0x88888800, 0x00111111, 0x44004444, 0x96960096
+.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b
+.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be
+.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e
+.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079
+.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c
+.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e
+.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e
+.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5
+.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6
+.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd
+.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059
+.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098
+.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a
+.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046
+.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba
+.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025
+.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042
+.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2
+.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa
+.long 0x40404000, 0x00808080, 0x20002020, 0x07070007
+.long 0x28282800, 0x00505050, 0x14001414, 0x55550055
+.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee
+.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a
+.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049
+.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068
+.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038
+.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4
+.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028
+.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b
+.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9
+.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1
+.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3
+.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4
+.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7
+.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARM_ARCH >= 6*/
diff --git a/comm/third_party/libgcrypt/cipher/camellia-glue.c b/comm/third_party/libgcrypt/cipher/camellia-glue.c
new file mode 100644
index 0000000000..6577b6516a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/camellia-glue.c
@@ -0,0 +1,1097 @@
+/* camellia-glue.c - Glue for the Camellia cipher
+ * Copyright (C) 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/* I put all the libgcrypt-specific stuff in this file to keep the
+ camellia.c/camellia.h files exactly as provided by NTT. If they
+ update their code, this should make it easier to bring the changes
+ in. - dshaw
+
+ There is one small change which needs to be done: Include the
+ following code at the top of camellia.h: */
+#if 0
+
+/* To use Camellia with libraries it is often useful to keep the name
+ * space of the library clean. The following macro is thus useful:
+ *
+ * #define CAMELLIA_EXT_SYM_PREFIX foo_
+ *
+ * This prefixes all external symbols with "foo_".
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#ifdef CAMELLIA_EXT_SYM_PREFIX
+#define CAMELLIA_PREFIX1(x,y) x ## y
+#define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y)
+#define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x)
+#define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen)
+#define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock)
+#define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock)
+#define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128)
+#define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256)
+#define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128)
+#define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256)
+#define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128)
+#define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192)
+#define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256)
+#endif /*CAMELLIA_EXT_SYM_PREFIX*/
+
+#endif /* Code sample. */
+
+
+#include <config.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "camellia.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+/* Helper macro to force alignment to 16 bytes. */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16 __attribute__ ((aligned (16)))
+#else
+# define ATTR_ALIGNED_16
+#endif
+
+/* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */
+#undef USE_AESNI_AVX
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AESNI_AVX 1
+# endif
+#endif
+
+/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
+#undef USE_AESNI_AVX2
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AESNI_AVX2 1
+# endif
+#endif
+
+typedef struct
+{
+ KEY_TABLE_TYPE keytable;
+ int keybitlength;
+#ifdef USE_AESNI_AVX
+ unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used. */
+#endif /*USE_AESNI_AVX*/
+#ifdef USE_AESNI_AVX2
+ unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used. */
+#endif /*USE_AESNI_AVX2*/
+} CAMELLIA_context;
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+#ifdef USE_AESNI_AVX
+/* Assembler implementations of Camellia using AES-NI and AVX. Process data
+ in 16 block same time.
+ */
+extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
+ const unsigned char *key,
+ unsigned int keylen) ASM_FUNC_ABI;
+#endif
+
+#ifdef USE_AESNI_AVX2
+/* Assembler implementations of Camellia using AES-NI and AVX2. Process data
+ in 32 block same time.
+ */
+extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[32]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[32]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[32]) ASM_FUNC_ABI;
+#endif
+
+static const char *selftest(void);
+
+static void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_camellia_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_camellia_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+static size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
+
+static gcry_err_code_t
+camellia_setkey(void *c, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ CAMELLIA_context *ctx=c;
+ static int initialized=0;
+ static const char *selftest_failed=NULL;
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+ unsigned int hwf = _gcry_get_hw_features ();
+#endif
+
+ if(keylen!=16 && keylen!=24 && keylen!=32)
+ return GPG_ERR_INV_KEYLEN;
+
+ if(!initialized)
+ {
+ initialized=1;
+ selftest_failed=selftest();
+ if(selftest_failed)
+ log_error("%s\n",selftest_failed);
+ }
+
+ if(selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+#ifdef USE_AESNI_AVX
+ ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
+#endif
+#ifdef USE_AESNI_AVX2
+ ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
+#endif
+
+ ctx->keybitlength=keylen*8;
+
+ /* Setup bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cbc_dec = _gcry_camellia_cbc_dec;
+ bulk_ops->cfb_dec = _gcry_camellia_cfb_dec;
+ bulk_ops->ctr_enc = _gcry_camellia_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_camellia_ocb_auth;
+
+ if (0)
+ { }
+#ifdef USE_AESNI_AVX
+ else if (ctx->use_aesni_avx)
+ _gcry_camellia_aesni_avx_keygen(ctx, key, keylen);
+ else
+#endif
+ {
+ Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable);
+ _gcry_burn_stack
+ ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */
+ +(4+32)*sizeof(u32)+2*sizeof(void*) /* camellia_setup192 */
+ +0+sizeof(int)+2*sizeof(void*) /* Camellia_Ekeygen */
+ +3*2*sizeof(void*) /* Function calls. */
+ );
+ }
+
+ return 0;
+}
+
+#ifdef USE_ARM_ASM
+
+/* Assembly implementations of Camellia. */
+extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable,
+ byte *outbuf, const byte *inbuf,
+ const int keybits);
+
+extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable,
+ byte *outbuf, const byte *inbuf,
+ const int keybits);
+
+static void Camellia_EncryptBlock(const int keyBitLength,
+ const unsigned char *plaintext,
+ const KEY_TABLE_TYPE keyTable,
+ unsigned char *cipherText)
+{
+ _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext,
+ keyBitLength);
+}
+
+static void Camellia_DecryptBlock(const int keyBitLength,
+ const unsigned char *cipherText,
+ const KEY_TABLE_TYPE keyTable,
+ unsigned char *plaintext)
+{
+ _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText,
+ keyBitLength);
+}
+
+#ifdef __aarch64__
+# define CAMELLIA_encrypt_stack_burn_size (0)
+# define CAMELLIA_decrypt_stack_burn_size (0)
+#else
+# define CAMELLIA_encrypt_stack_burn_size (15*4)
+# define CAMELLIA_decrypt_stack_burn_size (15*4)
+#endif
+
+static unsigned int
+camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
+{
+ CAMELLIA_context *ctx = c;
+ Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
+ return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
+}
+
+static unsigned int
+camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
+{
+ CAMELLIA_context *ctx=c;
+ Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
+ return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
+}
+
+#else /*USE_ARM_ASM*/
+
+static unsigned int
+camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
+{
+ CAMELLIA_context *ctx=c;
+
+ Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
+
+#define CAMELLIA_encrypt_stack_burn_size \
+ (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
+ +4*sizeof(u32)+4*sizeof(u32) \
+ +2*sizeof(u32*)+4*sizeof(u32) \
+ +2*2*sizeof(void*) /* Function calls. */ \
+ )
+
+ return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
+}
+
+static unsigned int
+camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
+{
+ CAMELLIA_context *ctx=c;
+
+ Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
+
+#define CAMELLIA_decrypt_stack_burn_size \
+ (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
+ +4*sizeof(u32)+4*sizeof(u32) \
+ +2*sizeof(u32*)+4*sizeof(u32) \
+ +2*2*sizeof(void*) /* Function calls. */ \
+ )
+
+ return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
+}
+
+#endif /*!USE_ARM_ASM*/
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size CAMELLIA_BLOCK_SIZE. */
+static void
+_gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ CAMELLIA_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE];
+ int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ int did_use_aesni_avx2 = 0;
+
+ /* Process data in 32 block chunks. */
+ while (nblocks >= 32)
+ {
+ _gcry_camellia_aesni_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 32;
+ outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx2 = 1;
+ }
+
+ if (did_use_aesni_avx2)
+ {
+ int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx2_burn_stack_depth)
+ burn_stack_depth = avx2_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ int did_use_aesni_avx = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 16;
+ outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx = 1;
+ }
+
+ if (did_use_aesni_avx)
+ {
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf);
+ /* XOR the input with the encrypted counter and store in output. */
+ cipher_block_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
+ outbuf += CAMELLIA_BLOCK_SIZE;
+ inbuf += CAMELLIA_BLOCK_SIZE;
+ /* Increment the counter. */
+ cipher_block_add(ctr, 1, CAMELLIA_BLOCK_SIZE);
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_camellia_cbc_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ CAMELLIA_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[CAMELLIA_BLOCK_SIZE];
+ int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ int did_use_aesni_avx2 = 0;
+
+ /* Process data in 32 block chunks. */
+ while (nblocks >= 32)
+ {
+ _gcry_camellia_aesni_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 32;
+ outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx2 = 1;
+ }
+
+ if (did_use_aesni_avx2)
+ {
+ int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;;
+
+ if (burn_stack_depth < avx2_burn_stack_depth)
+ burn_stack_depth = avx2_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ int did_use_aesni_avx = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx = 1;
+ }
+
+ if (did_use_aesni_avx)
+ {
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf);
+
+ cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+ CAMELLIA_BLOCK_SIZE);
+ inbuf += CAMELLIA_BLOCK_SIZE;
+ outbuf += CAMELLIA_BLOCK_SIZE;
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_camellia_cfb_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ CAMELLIA_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ int did_use_aesni_avx2 = 0;
+
+ /* Process data in 32 block chunks. */
+ while (nblocks >= 32)
+ {
+ _gcry_camellia_aesni_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 32;
+ outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx2 = 1;
+ }
+
+ if (did_use_aesni_avx2)
+ {
+ int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx2_burn_stack_depth)
+ burn_stack_depth = avx2_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ int did_use_aesni_avx = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx = 1;
+ }
+
+ if (did_use_aesni_avx)
+ {
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv);
+ cipher_block_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
+ outbuf += CAMELLIA_BLOCK_SIZE;
+ inbuf += CAMELLIA_BLOCK_SIZE;
+ }
+
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+ CAMELLIA_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+
+ burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size :
+ CAMELLIA_decrypt_stack_burn_size;
+#else
+ (void)c;
+ (void)outbuf_arg;
+ (void)inbuf_arg;
+ (void)encrypt;
+#endif
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ int did_use_aesni_avx2 = 0;
+ u64 Ls[32];
+ unsigned int n = 32 - (blkn % 32);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 32)
+ {
+ for (i = 0; i < 32; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
+ Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(31 + n) % 32];
+
+ /* Process data in 32 block chunks. */
+ while (nblocks >= 32)
+ {
+ blkn += 32;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
+
+ if (encrypt)
+ _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 32;
+ outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 32 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx2 = 1;
+ }
+ }
+
+ if (did_use_aesni_avx2)
+ {
+ int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx2_burn_stack_depth)
+ burn_stack_depth = avx2_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ int did_use_aesni_avx = 0;
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ if (encrypt)
+ _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 16;
+ outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ inbuf += 16 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx = 1;
+ }
+ }
+
+ if (did_use_aesni_avx)
+ {
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+ c->u_mode.ocb.data_nblocks = blkn;
+
+ if (burn_stack_depth)
+ _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+ return nblocks;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks)
+{
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+ CAMELLIA_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ int burn_stack_depth;
+ u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+ burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
+#else
+ (void)c;
+ (void)abuf_arg;
+#endif
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ int did_use_aesni_avx2 = 0;
+ u64 Ls[32];
+ unsigned int n = 32 - (blkn % 32);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 32)
+ {
+ for (i = 0; i < 32; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
+ Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(31 + n) % 32];
+
+ /* Process data in 32 block chunks. */
+ while (nblocks >= 32)
+ {
+ blkn += 32;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
+
+ _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
+ c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 32;
+ abuf += 32 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx2 = 1;
+ }
+ }
+
+ if (did_use_aesni_avx2)
+ {
+ int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx2_burn_stack_depth)
+ burn_stack_depth = avx2_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ int did_use_aesni_avx = 0;
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
+ c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 16;
+ abuf += 16 * CAMELLIA_BLOCK_SIZE;
+ did_use_aesni_avx = 1;
+ }
+ }
+
+ if (did_use_aesni_avx)
+ {
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+ c->u_mode.ocb.aad_nblocks = blkn;
+
+ if (burn_stack_depth)
+ _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+ return nblocks;
+}
+
+/* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+ const int nblocks = 32+16+1;
+ const int blocksize = CAMELLIA_BLOCK_SIZE;
+ const int context_size = sizeof(CAMELLIA_context);
+
+ return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey,
+ &camellia_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for CAMELLIA-CBC-128, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+ const int nblocks = 32+16+2;
+ const int blocksize = CAMELLIA_BLOCK_SIZE;
+ const int context_size = sizeof(CAMELLIA_context);
+
+ return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey,
+ &camellia_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for CAMELLIA-CFB-128, tests bulk CFB decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+ const int nblocks = 32+16+2;
+ const int blocksize = CAMELLIA_BLOCK_SIZE;
+ const int context_size = sizeof(CAMELLIA_context);
+
+ return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey,
+ &camellia_encrypt, nblocks, blocksize, context_size);
+}
+
+static const char *
+selftest(void)
+{
+ CAMELLIA_context ctx;
+ byte scratch[16];
+ cipher_bulk_ops_t bulk_ops;
+ const char *r;
+
+ /* These test vectors are from RFC-3713 */
+ static const byte plaintext[]=
+ {
+ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
+ 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
+ };
+ static const byte key_128[]=
+ {
+ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
+ 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
+ };
+ static const byte ciphertext_128[]=
+ {
+ 0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73,
+ 0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43
+ };
+ static const byte key_192[]=
+ {
+ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98,
+ 0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77
+ };
+ static const byte ciphertext_192[]=
+ {
+ 0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8,
+ 0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9
+ };
+ static const byte key_256[]=
+ {
+ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,
+ 0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,
+ 0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
+ };
+ static const byte ciphertext_256[]=
+ {
+ 0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c,
+ 0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09
+ };
+
+ camellia_setkey(&ctx,key_128,sizeof(key_128),&bulk_ops);
+ camellia_encrypt(&ctx,scratch,plaintext);
+ if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0)
+ return "CAMELLIA-128 test encryption failed.";
+ camellia_decrypt(&ctx,scratch,scratch);
+ if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
+ return "CAMELLIA-128 test decryption failed.";
+
+ camellia_setkey(&ctx,key_192,sizeof(key_192),&bulk_ops);
+ camellia_encrypt(&ctx,scratch,plaintext);
+ if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0)
+ return "CAMELLIA-192 test encryption failed.";
+ camellia_decrypt(&ctx,scratch,scratch);
+ if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
+ return "CAMELLIA-192 test decryption failed.";
+
+ camellia_setkey(&ctx,key_256,sizeof(key_256),&bulk_ops);
+ camellia_encrypt(&ctx,scratch,plaintext);
+ if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0)
+ return "CAMELLIA-256 test encryption failed.";
+ camellia_decrypt(&ctx,scratch,scratch);
+ if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
+ return "CAMELLIA-256 test decryption failed.";
+
+ if ( (r = selftest_ctr_128 ()) )
+ return r;
+
+ if ( (r = selftest_cbc_128 ()) )
+ return r;
+
+ if ( (r = selftest_cfb_128 ()) )
+ return r;
+
+ return NULL;
+}
+
+/* These oids are from
+ <http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications_oid.html>,
+ retrieved May 1, 2007. */
+
+static gcry_cipher_oid_spec_t camellia128_oids[] =
+ {
+ {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC},
+ {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB},
+ {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB},
+ {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB},
+ { NULL }
+ };
+
+static gcry_cipher_oid_spec_t camellia192_oids[] =
+ {
+ {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC},
+ {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB},
+ {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB},
+ {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB},
+ { NULL }
+ };
+
+static gcry_cipher_oid_spec_t camellia256_oids[] =
+ {
+ {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC},
+ {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB},
+ {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB},
+ {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB},
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_camellia128 =
+ {
+ GCRY_CIPHER_CAMELLIA128, {0, 0},
+ "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128,
+ sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_camellia192 =
+ {
+ GCRY_CIPHER_CAMELLIA192, {0, 0},
+ "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192,
+ sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_camellia256 =
+ {
+ GCRY_CIPHER_CAMELLIA256, {0, 0},
+ "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256,
+ sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
+ };
diff --git a/comm/third_party/libgcrypt/cipher/camellia.c b/comm/third_party/libgcrypt/cipher/camellia.c
new file mode 100644
index 0000000000..e7085a7ec8
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/camellia.c
@@ -0,0 +1,1413 @@
+/* camellia.h ver 1.2.0
+ *
+ * Copyright (C) 2006,2007
+ * NTT (Nippon Telegraph and Telephone Corporation).
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Algorithm Specification
+ * http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
+ */
+
+#include <config.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "types.h"
+#include "bufhelp.h"
+#include "camellia.h"
+
+typedef byte u8;
+
+/* key constants */
+
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/*
+ * macros
+ */
+
+
+#if defined(_MSC_VER)
+
+# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
+# define GETU32(p) SWAP(*((u32 *)(p)))
+# define PUTU32(ct, st) {*((u32 *)(ct)) = SWAP((st));}
+
+#else /* not MS-VC */
+
+# define GETU32(pt) buf_get_be32(pt)
+# define PUTU32(ct, st) buf_put_be32(ct, st)
+
+#endif
+
+#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2])
+#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1])
+
+/* rotation right shift 1byte */
+#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24))
+/* rotation left shift 1bit */
+#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31))
+/* rotation left shift 1byte */
+#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24))
+
+#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \
+ do { \
+ w0 = ll; \
+ ll = (ll << bits) + (lr >> (32 - bits)); \
+ lr = (lr << bits) + (rl >> (32 - bits)); \
+ rl = (rl << bits) + (rr >> (32 - bits)); \
+ rr = (rr << bits) + (w0 >> (32 - bits)); \
+ } while(0)
+
+#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \
+ do { \
+ w0 = ll; \
+ w1 = lr; \
+ ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \
+ lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \
+ rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \
+ rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \
+ } while(0)
+
+#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)])
+#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)])
+#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)])
+#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)])
+
+#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+ do { \
+ il = xl ^ kl; \
+ ir = xr ^ kr; \
+ t0 = il >> 16; \
+ t1 = ir >> 16; \
+ yl = CAMELLIA_SP1110(ir & 0xff) \
+ ^ CAMELLIA_SP0222((t1 >> 8) & 0xff) \
+ ^ CAMELLIA_SP3033(t1 & 0xff) \
+ ^ CAMELLIA_SP4404((ir >> 8) & 0xff); \
+ yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) \
+ ^ CAMELLIA_SP0222(t0 & 0xff) \
+ ^ CAMELLIA_SP3033((il >> 8) & 0xff) \
+ ^ CAMELLIA_SP4404(il & 0xff); \
+ yl ^= yr; \
+ yr = CAMELLIA_RR8(yr); \
+ yr ^= yl; \
+ } while(0)
+
+
+/*
+ * for speed up
+ *
+ */
+#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \
+ do { \
+ t0 = kll; \
+ t0 &= ll; \
+ lr ^= CAMELLIA_RL1(t0); \
+ t1 = klr; \
+ t1 |= lr; \
+ ll ^= t1; \
+ \
+ t2 = krr; \
+ t2 |= rr; \
+ rl ^= t2; \
+ t3 = krl; \
+ t3 &= rl; \
+ rr ^= CAMELLIA_RL1(t3); \
+ } while(0)
+
+#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+ do { \
+ yl ^= kl; \
+ yr ^= kr; \
+ ir = CAMELLIA_SP1110(xr & 0xff) \
+ ^ CAMELLIA_SP0222((xr >> 24) & 0xff) \
+ ^ CAMELLIA_SP3033((xr >> 16) & 0xff) \
+ ^ CAMELLIA_SP4404((xr >> 8) & 0xff); \
+ il = CAMELLIA_SP1110((xl >> 24) & 0xff) \
+ ^ CAMELLIA_SP0222((xl >> 16) & 0xff) \
+ ^ CAMELLIA_SP3033((xl >> 8) & 0xff) \
+ ^ CAMELLIA_SP4404(xl & 0xff); \
+ ir ^= il; \
+ il = CAMELLIA_RR8(il); \
+ il ^= ir; \
+ yl ^= ir; \
+ yr ^= il; \
+ } while(0)
+
+
+static const u32 camellia_sp1110[256] = {
+ 0x70707000,0x82828200,0x2c2c2c00,0xececec00,
+ 0xb3b3b300,0x27272700,0xc0c0c000,0xe5e5e500,
+ 0xe4e4e400,0x85858500,0x57575700,0x35353500,
+ 0xeaeaea00,0x0c0c0c00,0xaeaeae00,0x41414100,
+ 0x23232300,0xefefef00,0x6b6b6b00,0x93939300,
+ 0x45454500,0x19191900,0xa5a5a500,0x21212100,
+ 0xededed00,0x0e0e0e00,0x4f4f4f00,0x4e4e4e00,
+ 0x1d1d1d00,0x65656500,0x92929200,0xbdbdbd00,
+ 0x86868600,0xb8b8b800,0xafafaf00,0x8f8f8f00,
+ 0x7c7c7c00,0xebebeb00,0x1f1f1f00,0xcecece00,
+ 0x3e3e3e00,0x30303000,0xdcdcdc00,0x5f5f5f00,
+ 0x5e5e5e00,0xc5c5c500,0x0b0b0b00,0x1a1a1a00,
+ 0xa6a6a600,0xe1e1e100,0x39393900,0xcacaca00,
+ 0xd5d5d500,0x47474700,0x5d5d5d00,0x3d3d3d00,
+ 0xd9d9d900,0x01010100,0x5a5a5a00,0xd6d6d600,
+ 0x51515100,0x56565600,0x6c6c6c00,0x4d4d4d00,
+ 0x8b8b8b00,0x0d0d0d00,0x9a9a9a00,0x66666600,
+ 0xfbfbfb00,0xcccccc00,0xb0b0b000,0x2d2d2d00,
+ 0x74747400,0x12121200,0x2b2b2b00,0x20202000,
+ 0xf0f0f000,0xb1b1b100,0x84848400,0x99999900,
+ 0xdfdfdf00,0x4c4c4c00,0xcbcbcb00,0xc2c2c200,
+ 0x34343400,0x7e7e7e00,0x76767600,0x05050500,
+ 0x6d6d6d00,0xb7b7b700,0xa9a9a900,0x31313100,
+ 0xd1d1d100,0x17171700,0x04040400,0xd7d7d700,
+ 0x14141400,0x58585800,0x3a3a3a00,0x61616100,
+ 0xdedede00,0x1b1b1b00,0x11111100,0x1c1c1c00,
+ 0x32323200,0x0f0f0f00,0x9c9c9c00,0x16161600,
+ 0x53535300,0x18181800,0xf2f2f200,0x22222200,
+ 0xfefefe00,0x44444400,0xcfcfcf00,0xb2b2b200,
+ 0xc3c3c300,0xb5b5b500,0x7a7a7a00,0x91919100,
+ 0x24242400,0x08080800,0xe8e8e800,0xa8a8a800,
+ 0x60606000,0xfcfcfc00,0x69696900,0x50505000,
+ 0xaaaaaa00,0xd0d0d000,0xa0a0a000,0x7d7d7d00,
+ 0xa1a1a100,0x89898900,0x62626200,0x97979700,
+ 0x54545400,0x5b5b5b00,0x1e1e1e00,0x95959500,
+ 0xe0e0e000,0xffffff00,0x64646400,0xd2d2d200,
+ 0x10101000,0xc4c4c400,0x00000000,0x48484800,
+ 0xa3a3a300,0xf7f7f700,0x75757500,0xdbdbdb00,
+ 0x8a8a8a00,0x03030300,0xe6e6e600,0xdadada00,
+ 0x09090900,0x3f3f3f00,0xdddddd00,0x94949400,
+ 0x87878700,0x5c5c5c00,0x83838300,0x02020200,
+ 0xcdcdcd00,0x4a4a4a00,0x90909000,0x33333300,
+ 0x73737300,0x67676700,0xf6f6f600,0xf3f3f300,
+ 0x9d9d9d00,0x7f7f7f00,0xbfbfbf00,0xe2e2e200,
+ 0x52525200,0x9b9b9b00,0xd8d8d800,0x26262600,
+ 0xc8c8c800,0x37373700,0xc6c6c600,0x3b3b3b00,
+ 0x81818100,0x96969600,0x6f6f6f00,0x4b4b4b00,
+ 0x13131300,0xbebebe00,0x63636300,0x2e2e2e00,
+ 0xe9e9e900,0x79797900,0xa7a7a700,0x8c8c8c00,
+ 0x9f9f9f00,0x6e6e6e00,0xbcbcbc00,0x8e8e8e00,
+ 0x29292900,0xf5f5f500,0xf9f9f900,0xb6b6b600,
+ 0x2f2f2f00,0xfdfdfd00,0xb4b4b400,0x59595900,
+ 0x78787800,0x98989800,0x06060600,0x6a6a6a00,
+ 0xe7e7e700,0x46464600,0x71717100,0xbababa00,
+ 0xd4d4d400,0x25252500,0xababab00,0x42424200,
+ 0x88888800,0xa2a2a200,0x8d8d8d00,0xfafafa00,
+ 0x72727200,0x07070700,0xb9b9b900,0x55555500,
+ 0xf8f8f800,0xeeeeee00,0xacacac00,0x0a0a0a00,
+ 0x36363600,0x49494900,0x2a2a2a00,0x68686800,
+ 0x3c3c3c00,0x38383800,0xf1f1f100,0xa4a4a400,
+ 0x40404000,0x28282800,0xd3d3d300,0x7b7b7b00,
+ 0xbbbbbb00,0xc9c9c900,0x43434300,0xc1c1c100,
+ 0x15151500,0xe3e3e300,0xadadad00,0xf4f4f400,
+ 0x77777700,0xc7c7c700,0x80808000,0x9e9e9e00,
+};
+
+static const u32 camellia_sp0222[256] = {
+ 0x00e0e0e0,0x00050505,0x00585858,0x00d9d9d9,
+ 0x00676767,0x004e4e4e,0x00818181,0x00cbcbcb,
+ 0x00c9c9c9,0x000b0b0b,0x00aeaeae,0x006a6a6a,
+ 0x00d5d5d5,0x00181818,0x005d5d5d,0x00828282,
+ 0x00464646,0x00dfdfdf,0x00d6d6d6,0x00272727,
+ 0x008a8a8a,0x00323232,0x004b4b4b,0x00424242,
+ 0x00dbdbdb,0x001c1c1c,0x009e9e9e,0x009c9c9c,
+ 0x003a3a3a,0x00cacaca,0x00252525,0x007b7b7b,
+ 0x000d0d0d,0x00717171,0x005f5f5f,0x001f1f1f,
+ 0x00f8f8f8,0x00d7d7d7,0x003e3e3e,0x009d9d9d,
+ 0x007c7c7c,0x00606060,0x00b9b9b9,0x00bebebe,
+ 0x00bcbcbc,0x008b8b8b,0x00161616,0x00343434,
+ 0x004d4d4d,0x00c3c3c3,0x00727272,0x00959595,
+ 0x00ababab,0x008e8e8e,0x00bababa,0x007a7a7a,
+ 0x00b3b3b3,0x00020202,0x00b4b4b4,0x00adadad,
+ 0x00a2a2a2,0x00acacac,0x00d8d8d8,0x009a9a9a,
+ 0x00171717,0x001a1a1a,0x00353535,0x00cccccc,
+ 0x00f7f7f7,0x00999999,0x00616161,0x005a5a5a,
+ 0x00e8e8e8,0x00242424,0x00565656,0x00404040,
+ 0x00e1e1e1,0x00636363,0x00090909,0x00333333,
+ 0x00bfbfbf,0x00989898,0x00979797,0x00858585,
+ 0x00686868,0x00fcfcfc,0x00ececec,0x000a0a0a,
+ 0x00dadada,0x006f6f6f,0x00535353,0x00626262,
+ 0x00a3a3a3,0x002e2e2e,0x00080808,0x00afafaf,
+ 0x00282828,0x00b0b0b0,0x00747474,0x00c2c2c2,
+ 0x00bdbdbd,0x00363636,0x00222222,0x00383838,
+ 0x00646464,0x001e1e1e,0x00393939,0x002c2c2c,
+ 0x00a6a6a6,0x00303030,0x00e5e5e5,0x00444444,
+ 0x00fdfdfd,0x00888888,0x009f9f9f,0x00656565,
+ 0x00878787,0x006b6b6b,0x00f4f4f4,0x00232323,
+ 0x00484848,0x00101010,0x00d1d1d1,0x00515151,
+ 0x00c0c0c0,0x00f9f9f9,0x00d2d2d2,0x00a0a0a0,
+ 0x00555555,0x00a1a1a1,0x00414141,0x00fafafa,
+ 0x00434343,0x00131313,0x00c4c4c4,0x002f2f2f,
+ 0x00a8a8a8,0x00b6b6b6,0x003c3c3c,0x002b2b2b,
+ 0x00c1c1c1,0x00ffffff,0x00c8c8c8,0x00a5a5a5,
+ 0x00202020,0x00898989,0x00000000,0x00909090,
+ 0x00474747,0x00efefef,0x00eaeaea,0x00b7b7b7,
+ 0x00151515,0x00060606,0x00cdcdcd,0x00b5b5b5,
+ 0x00121212,0x007e7e7e,0x00bbbbbb,0x00292929,
+ 0x000f0f0f,0x00b8b8b8,0x00070707,0x00040404,
+ 0x009b9b9b,0x00949494,0x00212121,0x00666666,
+ 0x00e6e6e6,0x00cecece,0x00ededed,0x00e7e7e7,
+ 0x003b3b3b,0x00fefefe,0x007f7f7f,0x00c5c5c5,
+ 0x00a4a4a4,0x00373737,0x00b1b1b1,0x004c4c4c,
+ 0x00919191,0x006e6e6e,0x008d8d8d,0x00767676,
+ 0x00030303,0x002d2d2d,0x00dedede,0x00969696,
+ 0x00262626,0x007d7d7d,0x00c6c6c6,0x005c5c5c,
+ 0x00d3d3d3,0x00f2f2f2,0x004f4f4f,0x00191919,
+ 0x003f3f3f,0x00dcdcdc,0x00797979,0x001d1d1d,
+ 0x00525252,0x00ebebeb,0x00f3f3f3,0x006d6d6d,
+ 0x005e5e5e,0x00fbfbfb,0x00696969,0x00b2b2b2,
+ 0x00f0f0f0,0x00313131,0x000c0c0c,0x00d4d4d4,
+ 0x00cfcfcf,0x008c8c8c,0x00e2e2e2,0x00757575,
+ 0x00a9a9a9,0x004a4a4a,0x00575757,0x00848484,
+ 0x00111111,0x00454545,0x001b1b1b,0x00f5f5f5,
+ 0x00e4e4e4,0x000e0e0e,0x00737373,0x00aaaaaa,
+ 0x00f1f1f1,0x00dddddd,0x00595959,0x00141414,
+ 0x006c6c6c,0x00929292,0x00545454,0x00d0d0d0,
+ 0x00787878,0x00707070,0x00e3e3e3,0x00494949,
+ 0x00808080,0x00505050,0x00a7a7a7,0x00f6f6f6,
+ 0x00777777,0x00939393,0x00868686,0x00838383,
+ 0x002a2a2a,0x00c7c7c7,0x005b5b5b,0x00e9e9e9,
+ 0x00eeeeee,0x008f8f8f,0x00010101,0x003d3d3d,
+};
+
+static const u32 camellia_sp3033[256] = {
+ 0x38003838,0x41004141,0x16001616,0x76007676,
+ 0xd900d9d9,0x93009393,0x60006060,0xf200f2f2,
+ 0x72007272,0xc200c2c2,0xab00abab,0x9a009a9a,
+ 0x75007575,0x06000606,0x57005757,0xa000a0a0,
+ 0x91009191,0xf700f7f7,0xb500b5b5,0xc900c9c9,
+ 0xa200a2a2,0x8c008c8c,0xd200d2d2,0x90009090,
+ 0xf600f6f6,0x07000707,0xa700a7a7,0x27002727,
+ 0x8e008e8e,0xb200b2b2,0x49004949,0xde00dede,
+ 0x43004343,0x5c005c5c,0xd700d7d7,0xc700c7c7,
+ 0x3e003e3e,0xf500f5f5,0x8f008f8f,0x67006767,
+ 0x1f001f1f,0x18001818,0x6e006e6e,0xaf00afaf,
+ 0x2f002f2f,0xe200e2e2,0x85008585,0x0d000d0d,
+ 0x53005353,0xf000f0f0,0x9c009c9c,0x65006565,
+ 0xea00eaea,0xa300a3a3,0xae00aeae,0x9e009e9e,
+ 0xec00ecec,0x80008080,0x2d002d2d,0x6b006b6b,
+ 0xa800a8a8,0x2b002b2b,0x36003636,0xa600a6a6,
+ 0xc500c5c5,0x86008686,0x4d004d4d,0x33003333,
+ 0xfd00fdfd,0x66006666,0x58005858,0x96009696,
+ 0x3a003a3a,0x09000909,0x95009595,0x10001010,
+ 0x78007878,0xd800d8d8,0x42004242,0xcc00cccc,
+ 0xef00efef,0x26002626,0xe500e5e5,0x61006161,
+ 0x1a001a1a,0x3f003f3f,0x3b003b3b,0x82008282,
+ 0xb600b6b6,0xdb00dbdb,0xd400d4d4,0x98009898,
+ 0xe800e8e8,0x8b008b8b,0x02000202,0xeb00ebeb,
+ 0x0a000a0a,0x2c002c2c,0x1d001d1d,0xb000b0b0,
+ 0x6f006f6f,0x8d008d8d,0x88008888,0x0e000e0e,
+ 0x19001919,0x87008787,0x4e004e4e,0x0b000b0b,
+ 0xa900a9a9,0x0c000c0c,0x79007979,0x11001111,
+ 0x7f007f7f,0x22002222,0xe700e7e7,0x59005959,
+ 0xe100e1e1,0xda00dada,0x3d003d3d,0xc800c8c8,
+ 0x12001212,0x04000404,0x74007474,0x54005454,
+ 0x30003030,0x7e007e7e,0xb400b4b4,0x28002828,
+ 0x55005555,0x68006868,0x50005050,0xbe00bebe,
+ 0xd000d0d0,0xc400c4c4,0x31003131,0xcb00cbcb,
+ 0x2a002a2a,0xad00adad,0x0f000f0f,0xca00caca,
+ 0x70007070,0xff00ffff,0x32003232,0x69006969,
+ 0x08000808,0x62006262,0x00000000,0x24002424,
+ 0xd100d1d1,0xfb00fbfb,0xba00baba,0xed00eded,
+ 0x45004545,0x81008181,0x73007373,0x6d006d6d,
+ 0x84008484,0x9f009f9f,0xee00eeee,0x4a004a4a,
+ 0xc300c3c3,0x2e002e2e,0xc100c1c1,0x01000101,
+ 0xe600e6e6,0x25002525,0x48004848,0x99009999,
+ 0xb900b9b9,0xb300b3b3,0x7b007b7b,0xf900f9f9,
+ 0xce00cece,0xbf00bfbf,0xdf00dfdf,0x71007171,
+ 0x29002929,0xcd00cdcd,0x6c006c6c,0x13001313,
+ 0x64006464,0x9b009b9b,0x63006363,0x9d009d9d,
+ 0xc000c0c0,0x4b004b4b,0xb700b7b7,0xa500a5a5,
+ 0x89008989,0x5f005f5f,0xb100b1b1,0x17001717,
+ 0xf400f4f4,0xbc00bcbc,0xd300d3d3,0x46004646,
+ 0xcf00cfcf,0x37003737,0x5e005e5e,0x47004747,
+ 0x94009494,0xfa00fafa,0xfc00fcfc,0x5b005b5b,
+ 0x97009797,0xfe00fefe,0x5a005a5a,0xac00acac,
+ 0x3c003c3c,0x4c004c4c,0x03000303,0x35003535,
+ 0xf300f3f3,0x23002323,0xb800b8b8,0x5d005d5d,
+ 0x6a006a6a,0x92009292,0xd500d5d5,0x21002121,
+ 0x44004444,0x51005151,0xc600c6c6,0x7d007d7d,
+ 0x39003939,0x83008383,0xdc00dcdc,0xaa00aaaa,
+ 0x7c007c7c,0x77007777,0x56005656,0x05000505,
+ 0x1b001b1b,0xa400a4a4,0x15001515,0x34003434,
+ 0x1e001e1e,0x1c001c1c,0xf800f8f8,0x52005252,
+ 0x20002020,0x14001414,0xe900e9e9,0xbd00bdbd,
+ 0xdd00dddd,0xe400e4e4,0xa100a1a1,0xe000e0e0,
+ 0x8a008a8a,0xf100f1f1,0xd600d6d6,0x7a007a7a,
+ 0xbb00bbbb,0xe300e3e3,0x40004040,0x4f004f4f,
+};
+
+static const u32 camellia_sp4404[256] = {
+ 0x70700070,0x2c2c002c,0xb3b300b3,0xc0c000c0,
+ 0xe4e400e4,0x57570057,0xeaea00ea,0xaeae00ae,
+ 0x23230023,0x6b6b006b,0x45450045,0xa5a500a5,
+ 0xeded00ed,0x4f4f004f,0x1d1d001d,0x92920092,
+ 0x86860086,0xafaf00af,0x7c7c007c,0x1f1f001f,
+ 0x3e3e003e,0xdcdc00dc,0x5e5e005e,0x0b0b000b,
+ 0xa6a600a6,0x39390039,0xd5d500d5,0x5d5d005d,
+ 0xd9d900d9,0x5a5a005a,0x51510051,0x6c6c006c,
+ 0x8b8b008b,0x9a9a009a,0xfbfb00fb,0xb0b000b0,
+ 0x74740074,0x2b2b002b,0xf0f000f0,0x84840084,
+ 0xdfdf00df,0xcbcb00cb,0x34340034,0x76760076,
+ 0x6d6d006d,0xa9a900a9,0xd1d100d1,0x04040004,
+ 0x14140014,0x3a3a003a,0xdede00de,0x11110011,
+ 0x32320032,0x9c9c009c,0x53530053,0xf2f200f2,
+ 0xfefe00fe,0xcfcf00cf,0xc3c300c3,0x7a7a007a,
+ 0x24240024,0xe8e800e8,0x60600060,0x69690069,
+ 0xaaaa00aa,0xa0a000a0,0xa1a100a1,0x62620062,
+ 0x54540054,0x1e1e001e,0xe0e000e0,0x64640064,
+ 0x10100010,0x00000000,0xa3a300a3,0x75750075,
+ 0x8a8a008a,0xe6e600e6,0x09090009,0xdddd00dd,
+ 0x87870087,0x83830083,0xcdcd00cd,0x90900090,
+ 0x73730073,0xf6f600f6,0x9d9d009d,0xbfbf00bf,
+ 0x52520052,0xd8d800d8,0xc8c800c8,0xc6c600c6,
+ 0x81810081,0x6f6f006f,0x13130013,0x63630063,
+ 0xe9e900e9,0xa7a700a7,0x9f9f009f,0xbcbc00bc,
+ 0x29290029,0xf9f900f9,0x2f2f002f,0xb4b400b4,
+ 0x78780078,0x06060006,0xe7e700e7,0x71710071,
+ 0xd4d400d4,0xabab00ab,0x88880088,0x8d8d008d,
+ 0x72720072,0xb9b900b9,0xf8f800f8,0xacac00ac,
+ 0x36360036,0x2a2a002a,0x3c3c003c,0xf1f100f1,
+ 0x40400040,0xd3d300d3,0xbbbb00bb,0x43430043,
+ 0x15150015,0xadad00ad,0x77770077,0x80800080,
+ 0x82820082,0xecec00ec,0x27270027,0xe5e500e5,
+ 0x85850085,0x35350035,0x0c0c000c,0x41410041,
+ 0xefef00ef,0x93930093,0x19190019,0x21210021,
+ 0x0e0e000e,0x4e4e004e,0x65650065,0xbdbd00bd,
+ 0xb8b800b8,0x8f8f008f,0xebeb00eb,0xcece00ce,
+ 0x30300030,0x5f5f005f,0xc5c500c5,0x1a1a001a,
+ 0xe1e100e1,0xcaca00ca,0x47470047,0x3d3d003d,
+ 0x01010001,0xd6d600d6,0x56560056,0x4d4d004d,
+ 0x0d0d000d,0x66660066,0xcccc00cc,0x2d2d002d,
+ 0x12120012,0x20200020,0xb1b100b1,0x99990099,
+ 0x4c4c004c,0xc2c200c2,0x7e7e007e,0x05050005,
+ 0xb7b700b7,0x31310031,0x17170017,0xd7d700d7,
+ 0x58580058,0x61610061,0x1b1b001b,0x1c1c001c,
+ 0x0f0f000f,0x16160016,0x18180018,0x22220022,
+ 0x44440044,0xb2b200b2,0xb5b500b5,0x91910091,
+ 0x08080008,0xa8a800a8,0xfcfc00fc,0x50500050,
+ 0xd0d000d0,0x7d7d007d,0x89890089,0x97970097,
+ 0x5b5b005b,0x95950095,0xffff00ff,0xd2d200d2,
+ 0xc4c400c4,0x48480048,0xf7f700f7,0xdbdb00db,
+ 0x03030003,0xdada00da,0x3f3f003f,0x94940094,
+ 0x5c5c005c,0x02020002,0x4a4a004a,0x33330033,
+ 0x67670067,0xf3f300f3,0x7f7f007f,0xe2e200e2,
+ 0x9b9b009b,0x26260026,0x37370037,0x3b3b003b,
+ 0x96960096,0x4b4b004b,0xbebe00be,0x2e2e002e,
+ 0x79790079,0x8c8c008c,0x6e6e006e,0x8e8e008e,
+ 0xf5f500f5,0xb6b600b6,0xfdfd00fd,0x59590059,
+ 0x98980098,0x6a6a006a,0x46460046,0xbaba00ba,
+ 0x25250025,0x42420042,0xa2a200a2,0xfafa00fa,
+ 0x07070007,0x55550055,0xeeee00ee,0x0a0a000a,
+ 0x49490049,0x68680068,0x38380038,0xa4a400a4,
+ 0x28280028,0x7b7b007b,0xc9c900c9,0xc1c100c1,
+ 0xe3e300e3,0xf4f400f4,0xc7c700c7,0x9e9e009e,
+};
+
+
+/**
+ * Stuff related to the Camellia key schedule
+ */
+#define subl(x) subL[(x)]
+#define subr(x) subR[(x)]
+
+void camellia_setup128(const unsigned char *key, u32 *subkey)
+{
+ u32 kll, klr, krl, krr;
+ u32 il, ir, t0, t1, w0, w1;
+ u32 kw4l, kw4r, dw, tl, tr;
+ u32 subL[26];
+ u32 subR[26];
+
+ /**
+ * k == kll || klr || krl || krr (|| is concatination)
+ */
+ kll = GETU32(key );
+ klr = GETU32(key + 4);
+ krl = GETU32(key + 8);
+ krr = GETU32(key + 12);
+ /**
+ * generate KL dependent subkeys
+ */
+ subl(0) = kll; subr(0) = klr;
+ subl(1) = krl; subr(1) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(4) = kll; subr(4) = klr;
+ subl(5) = krl; subr(5) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+ subl(10) = kll; subr(10) = klr;
+ subl(11) = krl; subr(11) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(13) = krl; subr(13) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(16) = kll; subr(16) = klr;
+ subl(17) = krl; subr(17) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(18) = kll; subr(18) = klr;
+ subl(19) = krl; subr(19) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(22) = kll; subr(22) = klr;
+ subl(23) = krl; subr(23) = krr;
+
+ /* generate KA */
+ kll = subl(0); klr = subr(0);
+ krl = subl(1); krr = subr(1);
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+ w0, w1, il, ir, t0, t1);
+ krl ^= w0; krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+ kll, klr, il, ir, t0, t1);
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+ krl, krr, il, ir, t0, t1);
+ krl ^= w0; krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+ w0, w1, il, ir, t0, t1);
+ kll ^= w0; klr ^= w1;
+
+ /* generate KA dependent subkeys */
+ subl(2) = kll; subr(2) = klr;
+ subl(3) = krl; subr(3) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(6) = kll; subr(6) = klr;
+ subl(7) = krl; subr(7) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(8) = kll; subr(8) = klr;
+ subl(9) = krl; subr(9) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(12) = kll; subr(12) = klr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(14) = kll; subr(14) = klr;
+ subl(15) = krl; subr(15) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+ subl(20) = kll; subr(20) = klr;
+ subl(21) = krl; subr(21) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(24) = kll; subr(24) = klr;
+ subl(25) = krl; subr(25) = krr;
+
+
+ /* absorb kw2 to other subkeys */
+ subl(3) ^= subl(1); subr(3) ^= subr(1);
+ subl(5) ^= subl(1); subr(5) ^= subr(1);
+ subl(7) ^= subl(1); subr(7) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(9);
+ dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(11) ^= subl(1); subr(11) ^= subr(1);
+ subl(13) ^= subl(1); subr(13) ^= subr(1);
+ subl(15) ^= subl(1); subr(15) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(17);
+ dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(19) ^= subl(1); subr(19) ^= subr(1);
+ subl(21) ^= subl(1); subr(21) ^= subr(1);
+ subl(23) ^= subl(1); subr(23) ^= subr(1);
+ subl(24) ^= subl(1); subr(24) ^= subr(1);
+
+ /* absorb kw4 to other subkeys */
+ kw4l = subl(25); kw4r = subr(25);
+ subl(22) ^= kw4l; subr(22) ^= kw4r;
+ subl(20) ^= kw4l; subr(20) ^= kw4r;
+ subl(18) ^= kw4l; subr(18) ^= kw4r;
+ kw4l ^= kw4r & ~subr(16);
+ dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+ subl(14) ^= kw4l; subr(14) ^= kw4r;
+ subl(12) ^= kw4l; subr(12) ^= kw4r;
+ subl(10) ^= kw4l; subr(10) ^= kw4r;
+ kw4l ^= kw4r & ~subr(8);
+ dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+ subl(6) ^= kw4l; subr(6) ^= kw4r;
+ subl(4) ^= kw4l; subr(4) ^= kw4r;
+ subl(2) ^= kw4l; subr(2) ^= kw4r;
+ subl(0) ^= kw4l; subr(0) ^= kw4r;
+
+ /* key XOR is end of F-function */
+ CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+ CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+ CamelliaSubkeyL(2) = subl(3);
+ CamelliaSubkeyR(2) = subr(3);
+ CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+ CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+ CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+ CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+ CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+ CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+ CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+ CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+ tl = subl(10) ^ (subr(10) & ~subr(8));
+ dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(7) = subl(6) ^ tl;
+ CamelliaSubkeyR(7) = subr(6) ^ tr;
+ CamelliaSubkeyL(8) = subl(8);
+ CamelliaSubkeyR(8) = subr(8);
+ CamelliaSubkeyL(9) = subl(9);
+ CamelliaSubkeyR(9) = subr(9);
+ tl = subl(7) ^ (subr(7) & ~subr(9));
+ dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(10) = tl ^ subl(11);
+ CamelliaSubkeyR(10) = tr ^ subr(11);
+ CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+ CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+ CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+ CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+ CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+ CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+ CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+ CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+ tl = subl(18) ^ (subr(18) & ~subr(16));
+ dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(15) = subl(14) ^ tl;
+ CamelliaSubkeyR(15) = subr(14) ^ tr;
+ CamelliaSubkeyL(16) = subl(16);
+ CamelliaSubkeyR(16) = subr(16);
+ CamelliaSubkeyL(17) = subl(17);
+ CamelliaSubkeyR(17) = subr(17);
+ tl = subl(15) ^ (subr(15) & ~subr(17));
+ dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(18) = tl ^ subl(19);
+ CamelliaSubkeyR(18) = tr ^ subr(19);
+ CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+ CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+ CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+ CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+ CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+ CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+ CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+ CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+ CamelliaSubkeyL(23) = subl(22);
+ CamelliaSubkeyR(23) = subr(22);
+ CamelliaSubkeyL(24) = subl(24) ^ subl(23);
+ CamelliaSubkeyR(24) = subr(24) ^ subr(23);
+
+ return;
+}
+
+void camellia_setup256(const unsigned char *key, u32 *subkey)
+{
+ u32 kll,klr,krl,krr; /* left half of key */
+ u32 krll,krlr,krrl,krrr; /* right half of key */
+ u32 il, ir, t0, t1, w0, w1; /* temporary variables */
+ u32 kw4l, kw4r, dw, tl, tr;
+ u32 subL[34];
+ u32 subR[34];
+
+ /**
+ * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr)
+ * (|| is concatination)
+ */
+
+ kll = GETU32(key );
+ klr = GETU32(key + 4);
+ krl = GETU32(key + 8);
+ krr = GETU32(key + 12);
+ krll = GETU32(key + 16);
+ krlr = GETU32(key + 20);
+ krrl = GETU32(key + 24);
+ krrr = GETU32(key + 28);
+
+ /* generate KL dependent subkeys */
+ subl(0) = kll; subr(0) = klr;
+ subl(1) = krl; subr(1) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45);
+ subl(12) = kll; subr(12) = klr;
+ subl(13) = krl; subr(13) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(16) = kll; subr(16) = klr;
+ subl(17) = krl; subr(17) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(22) = kll; subr(22) = klr;
+ subl(23) = krl; subr(23) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+ subl(30) = kll; subr(30) = klr;
+ subl(31) = krl; subr(31) = krr;
+
+ /* generate KR dependent subkeys */
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+ subl(4) = krll; subr(4) = krlr;
+ subl(5) = krrl; subr(5) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+ subl(8) = krll; subr(8) = krlr;
+ subl(9) = krrl; subr(9) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(18) = krll; subr(18) = krlr;
+ subl(19) = krrl; subr(19) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+ subl(26) = krll; subr(26) = krlr;
+ subl(27) = krrl; subr(27) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+
+ /* generate KA */
+ kll = subl(0) ^ krll; klr = subr(0) ^ krlr;
+ krl = subl(1) ^ krrl; krr = subr(1) ^ krrr;
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+ w0, w1, il, ir, t0, t1);
+ krl ^= w0; krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+ kll, klr, il, ir, t0, t1);
+ kll ^= krll; klr ^= krlr;
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+ krl, krr, il, ir, t0, t1);
+ krl ^= w0 ^ krrl; krr ^= w1 ^ krrr;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+ w0, w1, il, ir, t0, t1);
+ kll ^= w0; klr ^= w1;
+
+ /* generate KB */
+ krll ^= kll; krlr ^= klr;
+ krrl ^= krl; krrr ^= krr;
+ CAMELLIA_F(krll, krlr,
+ CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R,
+ w0, w1, il, ir, t0, t1);
+ krrl ^= w0; krrr ^= w1;
+ CAMELLIA_F(krrl, krrr,
+ CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R,
+ w0, w1, il, ir, t0, t1);
+ krll ^= w0; krlr ^= w1;
+
+ /* generate KA dependent subkeys */
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(6) = kll; subr(6) = klr;
+ subl(7) = krl; subr(7) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+ subl(14) = kll; subr(14) = klr;
+ subl(15) = krl; subr(15) = krr;
+ subl(24) = klr; subr(24) = krl;
+ subl(25) = krr; subr(25) = kll;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49);
+ subl(28) = kll; subr(28) = klr;
+ subl(29) = krl; subr(29) = krr;
+
+ /* generate KB dependent subkeys */
+ subl(2) = krll; subr(2) = krlr;
+ subl(3) = krrl; subr(3) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(10) = krll; subr(10) = krlr;
+ subl(11) = krrl; subr(11) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(20) = krll; subr(20) = krlr;
+ subl(21) = krrl; subr(21) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51);
+ subl(32) = krll; subr(32) = krlr;
+ subl(33) = krrl; subr(33) = krrr;
+
+ /* absorb kw2 to other subkeys */
+ subl(3) ^= subl(1); subr(3) ^= subr(1);
+ subl(5) ^= subl(1); subr(5) ^= subr(1);
+ subl(7) ^= subl(1); subr(7) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(9);
+ dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(11) ^= subl(1); subr(11) ^= subr(1);
+ subl(13) ^= subl(1); subr(13) ^= subr(1);
+ subl(15) ^= subl(1); subr(15) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(17);
+ dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(19) ^= subl(1); subr(19) ^= subr(1);
+ subl(21) ^= subl(1); subr(21) ^= subr(1);
+ subl(23) ^= subl(1); subr(23) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(25);
+ dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(27) ^= subl(1); subr(27) ^= subr(1);
+ subl(29) ^= subl(1); subr(29) ^= subr(1);
+ subl(31) ^= subl(1); subr(31) ^= subr(1);
+ subl(32) ^= subl(1); subr(32) ^= subr(1);
+
+ /* absorb kw4 to other subkeys */
+ kw4l = subl(33); kw4r = subr(33);
+ subl(30) ^= kw4l; subr(30) ^= kw4r;
+ subl(28) ^= kw4l; subr(28) ^= kw4r;
+ subl(26) ^= kw4l; subr(26) ^= kw4r;
+ kw4l ^= kw4r & ~subr(24);
+ dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw);
+ subl(22) ^= kw4l; subr(22) ^= kw4r;
+ subl(20) ^= kw4l; subr(20) ^= kw4r;
+ subl(18) ^= kw4l; subr(18) ^= kw4r;
+ kw4l ^= kw4r & ~subr(16);
+ dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+ subl(14) ^= kw4l; subr(14) ^= kw4r;
+ subl(12) ^= kw4l; subr(12) ^= kw4r;
+ subl(10) ^= kw4l; subr(10) ^= kw4r;
+ kw4l ^= kw4r & ~subr(8);
+ dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+ subl(6) ^= kw4l; subr(6) ^= kw4r;
+ subl(4) ^= kw4l; subr(4) ^= kw4r;
+ subl(2) ^= kw4l; subr(2) ^= kw4r;
+ subl(0) ^= kw4l; subr(0) ^= kw4r;
+
+ /* key XOR is end of F-function */
+ CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+ CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+ CamelliaSubkeyL(2) = subl(3);
+ CamelliaSubkeyR(2) = subr(3);
+ CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+ CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+ CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+ CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+ CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+ CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+ CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+ CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+ tl = subl(10) ^ (subr(10) & ~subr(8));
+ dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(7) = subl(6) ^ tl;
+ CamelliaSubkeyR(7) = subr(6) ^ tr;
+ CamelliaSubkeyL(8) = subl(8);
+ CamelliaSubkeyR(8) = subr(8);
+ CamelliaSubkeyL(9) = subl(9);
+ CamelliaSubkeyR(9) = subr(9);
+ tl = subl(7) ^ (subr(7) & ~subr(9));
+ dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(10) = tl ^ subl(11);
+ CamelliaSubkeyR(10) = tr ^ subr(11);
+ CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+ CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+ CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+ CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+ CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+ CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+ CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+ CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+ tl = subl(18) ^ (subr(18) & ~subr(16));
+ dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(15) = subl(14) ^ tl;
+ CamelliaSubkeyR(15) = subr(14) ^ tr;
+ CamelliaSubkeyL(16) = subl(16);
+ CamelliaSubkeyR(16) = subr(16);
+ CamelliaSubkeyL(17) = subl(17);
+ CamelliaSubkeyR(17) = subr(17);
+ tl = subl(15) ^ (subr(15) & ~subr(17));
+ dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(18) = tl ^ subl(19);
+ CamelliaSubkeyR(18) = tr ^ subr(19);
+ CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+ CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+ CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+ CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+ CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+ CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+ CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+ CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+ tl = subl(26) ^ (subr(26) & ~subr(24));
+ dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(23) = subl(22) ^ tl;
+ CamelliaSubkeyR(23) = subr(22) ^ tr;
+ CamelliaSubkeyL(24) = subl(24);
+ CamelliaSubkeyR(24) = subr(24);
+ CamelliaSubkeyL(25) = subl(25);
+ CamelliaSubkeyR(25) = subr(25);
+ tl = subl(23) ^ (subr(23) & ~subr(25));
+ dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(26) = tl ^ subl(27);
+ CamelliaSubkeyR(26) = tr ^ subr(27);
+ CamelliaSubkeyL(27) = subl(26) ^ subl(28);
+ CamelliaSubkeyR(27) = subr(26) ^ subr(28);
+ CamelliaSubkeyL(28) = subl(27) ^ subl(29);
+ CamelliaSubkeyR(28) = subr(27) ^ subr(29);
+ CamelliaSubkeyL(29) = subl(28) ^ subl(30);
+ CamelliaSubkeyR(29) = subr(28) ^ subr(30);
+ CamelliaSubkeyL(30) = subl(29) ^ subl(31);
+ CamelliaSubkeyR(30) = subr(29) ^ subr(31);
+ CamelliaSubkeyL(31) = subl(30);
+ CamelliaSubkeyR(31) = subr(30);
+ CamelliaSubkeyL(32) = subl(32) ^ subl(31);
+ CamelliaSubkeyR(32) = subr(32) ^ subr(31);
+
+ return;
+}
+
+void camellia_setup192(const unsigned char *key, u32 *subkey)
+{
+ unsigned char kk[32];
+ u32 krll, krlr, krrl,krrr;
+
+ memcpy(kk, key, 24);
+ memcpy((unsigned char *)&krll, key+16,4);
+ memcpy((unsigned char *)&krlr, key+20,4);
+ krrl = ~krll;
+ krrr = ~krlr;
+ memcpy(kk+24, (unsigned char *)&krrl, 4);
+ memcpy(kk+28, (unsigned char *)&krrr, 4);
+ camellia_setup256(kk, subkey);
+ return;
+}
+
+
+#ifndef USE_ARM_ASM
+/**
+ * Stuff related to camellia encryption/decryption
+ *
+ * "io" must be 4byte aligned and big-endian data.
+ */
+void camellia_encrypt128(const u32 *subkey, u32 *blocks)
+{
+ u32 il, ir, t0, t1;
+ u32 io[4];
+
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(0);
+ io[1] ^= CamelliaSubkeyR(0);
+ /* main iteration */
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(2),CamelliaSubkeyR(2),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(3),CamelliaSubkeyR(3),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(4),CamelliaSubkeyR(4),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(5),CamelliaSubkeyR(5),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(6),CamelliaSubkeyR(6),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(7),CamelliaSubkeyR(7),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(8),CamelliaSubkeyR(8),
+ CamelliaSubkeyL(9),CamelliaSubkeyR(9),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(10),CamelliaSubkeyR(10),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(11),CamelliaSubkeyR(11),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(12),CamelliaSubkeyR(12),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(13),CamelliaSubkeyR(13),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(14),CamelliaSubkeyR(14),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(15),CamelliaSubkeyR(15),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(16),CamelliaSubkeyR(16),
+ CamelliaSubkeyL(17),CamelliaSubkeyR(17),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(18),CamelliaSubkeyR(18),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(19),CamelliaSubkeyR(19),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(20),CamelliaSubkeyR(20),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(21),CamelliaSubkeyR(21),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(22),CamelliaSubkeyR(22),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(23),CamelliaSubkeyR(23),
+ io[0],io[1],il,ir,t0,t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(24);
+ io[3] ^= CamelliaSubkeyR(24);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
+ return;
+}
+
+void camellia_decrypt128(const u32 *subkey, u32 *blocks)
+{
+ u32 il,ir,t0,t1; /* temporary valiables */
+ u32 io[4];
+
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(24);
+ io[1] ^= CamelliaSubkeyR(24);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(23),CamelliaSubkeyR(23),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(22),CamelliaSubkeyR(22),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(21),CamelliaSubkeyR(21),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(20),CamelliaSubkeyR(20),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(19),CamelliaSubkeyR(19),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(18),CamelliaSubkeyR(18),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(17),CamelliaSubkeyR(17),
+ CamelliaSubkeyL(16),CamelliaSubkeyR(16),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(15),CamelliaSubkeyR(15),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(14),CamelliaSubkeyR(14),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(13),CamelliaSubkeyR(13),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(12),CamelliaSubkeyR(12),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(11),CamelliaSubkeyR(11),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(10),CamelliaSubkeyR(10),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(9),CamelliaSubkeyR(9),
+ CamelliaSubkeyL(8),CamelliaSubkeyR(8),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(7),CamelliaSubkeyR(7),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(6),CamelliaSubkeyR(6),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(5),CamelliaSubkeyR(5),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(4),CamelliaSubkeyR(4),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(3),CamelliaSubkeyR(3),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(2),CamelliaSubkeyR(2),
+ io[0],io[1],il,ir,t0,t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(0);
+ io[3] ^= CamelliaSubkeyR(0);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
+ return;
+}
+
+/**
+ * stuff for 192 and 256bit encryption/decryption
+ */
+void camellia_encrypt256(const u32 *subkey, u32 *blocks)
+{
+ u32 il,ir,t0,t1; /* temporary valiables */
+ u32 io[4];
+
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(0);
+ io[1] ^= CamelliaSubkeyR(0);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(2),CamelliaSubkeyR(2),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(3),CamelliaSubkeyR(3),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(4),CamelliaSubkeyR(4),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(5),CamelliaSubkeyR(5),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(6),CamelliaSubkeyR(6),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(7),CamelliaSubkeyR(7),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(8),CamelliaSubkeyR(8),
+ CamelliaSubkeyL(9),CamelliaSubkeyR(9),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(10),CamelliaSubkeyR(10),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(11),CamelliaSubkeyR(11),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(12),CamelliaSubkeyR(12),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(13),CamelliaSubkeyR(13),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(14),CamelliaSubkeyR(14),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(15),CamelliaSubkeyR(15),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(16),CamelliaSubkeyR(16),
+ CamelliaSubkeyL(17),CamelliaSubkeyR(17),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(18),CamelliaSubkeyR(18),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(19),CamelliaSubkeyR(19),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(20),CamelliaSubkeyR(20),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(21),CamelliaSubkeyR(21),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(22),CamelliaSubkeyR(22),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(23),CamelliaSubkeyR(23),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(24),CamelliaSubkeyR(24),
+ CamelliaSubkeyL(25),CamelliaSubkeyR(25),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(26),CamelliaSubkeyR(26),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(27),CamelliaSubkeyR(27),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(28),CamelliaSubkeyR(28),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(29),CamelliaSubkeyR(29),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(30),CamelliaSubkeyR(30),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(31),CamelliaSubkeyR(31),
+ io[0],io[1],il,ir,t0,t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(32);
+ io[3] ^= CamelliaSubkeyR(32);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
+ return;
+}
+
+void camellia_decrypt256(const u32 *subkey, u32 *blocks)
+{
+ u32 il,ir,t0,t1; /* temporary valiables */
+ u32 io[4];
+
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(32);
+ io[1] ^= CamelliaSubkeyR(32);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(31),CamelliaSubkeyR(31),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(30),CamelliaSubkeyR(30),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(29),CamelliaSubkeyR(29),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(28),CamelliaSubkeyR(28),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(27),CamelliaSubkeyR(27),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(26),CamelliaSubkeyR(26),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(25),CamelliaSubkeyR(25),
+ CamelliaSubkeyL(24),CamelliaSubkeyR(24),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(23),CamelliaSubkeyR(23),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(22),CamelliaSubkeyR(22),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(21),CamelliaSubkeyR(21),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(20),CamelliaSubkeyR(20),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(19),CamelliaSubkeyR(19),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(18),CamelliaSubkeyR(18),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(17),CamelliaSubkeyR(17),
+ CamelliaSubkeyL(16),CamelliaSubkeyR(16),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(15),CamelliaSubkeyR(15),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(14),CamelliaSubkeyR(14),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(13),CamelliaSubkeyR(13),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(12),CamelliaSubkeyR(12),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(11),CamelliaSubkeyR(11),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(10),CamelliaSubkeyR(10),
+ io[0],io[1],il,ir,t0,t1);
+
+ CAMELLIA_FLS(io[0],io[1],io[2],io[3],
+ CamelliaSubkeyL(9),CamelliaSubkeyR(9),
+ CamelliaSubkeyL(8),CamelliaSubkeyR(8),
+ t0,t1,il,ir);
+
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(7),CamelliaSubkeyR(7),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(6),CamelliaSubkeyR(6),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(5),CamelliaSubkeyR(5),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(4),CamelliaSubkeyR(4),
+ io[0],io[1],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[0],io[1],
+ CamelliaSubkeyL(3),CamelliaSubkeyR(3),
+ io[2],io[3],il,ir,t0,t1);
+ CAMELLIA_ROUNDSM(io[2],io[3],
+ CamelliaSubkeyL(2),CamelliaSubkeyR(2),
+ io[0],io[1],il,ir,t0,t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(0);
+ io[3] ^= CamelliaSubkeyR(0);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
+ return;
+}
+#endif /*!USE_ARM_ASM*/
+
+
+/***
+ *
+ * API for compatibility
+ */
+
+void Camellia_Ekeygen(const int keyBitLength,
+ const unsigned char *rawKey,
+ KEY_TABLE_TYPE keyTable)
+{
+ switch(keyBitLength) {
+ case 128:
+ camellia_setup128(rawKey, keyTable);
+ break;
+ case 192:
+ camellia_setup192(rawKey, keyTable);
+ break;
+ case 256:
+ camellia_setup256(rawKey, keyTable);
+ break;
+ default:
+ break;
+ }
+}
+
+
+#ifndef USE_ARM_ASM
+void Camellia_EncryptBlock(const int keyBitLength,
+ const unsigned char *plaintext,
+ const KEY_TABLE_TYPE keyTable,
+ unsigned char *ciphertext)
+{
+ u32 tmp[4];
+
+ tmp[0] = GETU32(plaintext);
+ tmp[1] = GETU32(plaintext + 4);
+ tmp[2] = GETU32(plaintext + 8);
+ tmp[3] = GETU32(plaintext + 12);
+
+ switch (keyBitLength) {
+ case 128:
+ camellia_encrypt128(keyTable, tmp);
+ break;
+ case 192:
+ /* fall through */
+ case 256:
+ camellia_encrypt256(keyTable, tmp);
+ break;
+ default:
+ break;
+ }
+
+ PUTU32(ciphertext, tmp[0]);
+ PUTU32(ciphertext + 4, tmp[1]);
+ PUTU32(ciphertext + 8, tmp[2]);
+ PUTU32(ciphertext + 12, tmp[3]);
+}
+
+void Camellia_DecryptBlock(const int keyBitLength,
+ const unsigned char *ciphertext,
+ const KEY_TABLE_TYPE keyTable,
+ unsigned char *plaintext)
+{
+ u32 tmp[4];
+
+ tmp[0] = GETU32(ciphertext);
+ tmp[1] = GETU32(ciphertext + 4);
+ tmp[2] = GETU32(ciphertext + 8);
+ tmp[3] = GETU32(ciphertext + 12);
+
+ switch (keyBitLength) {
+ case 128:
+ camellia_decrypt128(keyTable, tmp);
+ break;
+ case 192:
+ /* fall through */
+ case 256:
+ camellia_decrypt256(keyTable, tmp);
+ break;
+ default:
+ break;
+ }
+ PUTU32(plaintext, tmp[0]);
+ PUTU32(plaintext + 4, tmp[1]);
+ PUTU32(plaintext + 8, tmp[2]);
+ PUTU32(plaintext + 12, tmp[3]);
+}
+#endif /*!USE_ARM_ASM*/
diff --git a/comm/third_party/libgcrypt/cipher/camellia.h b/comm/third_party/libgcrypt/cipher/camellia.h
new file mode 100644
index 0000000000..d7a1e6f4a0
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/camellia.h
@@ -0,0 +1,95 @@
+/* camellia.h ver 1.2.0
+ *
+ * Copyright (C) 2006,2007
+ * NTT (Nippon Telegraph and Telephone Corporation).
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef HEADER_CAMELLIA_H
+#define HEADER_CAMELLIA_H
+
+/* To use Camellia with libraries it is often useful to keep the name
+ * space of the library clean. The following macro is thus useful:
+ *
+ * #define CAMELLIA_EXT_SYM_PREFIX foo_
+ *
+ * This prefixes all external symbols with "foo_".
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+# undef USE_ARM_ASM
+# if defined(__ARMEL__)
+# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+# define USE_ARM_ASM 1
+# endif
+# endif
+# if defined(__AARCH64EL__)
+# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+# define USE_ARM_ASM 1
+# endif
+# endif
+#endif
+#ifdef CAMELLIA_EXT_SYM_PREFIX
+#define CAMELLIA_PREFIX1(x,y) x ## y
+#define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y)
+#define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x)
+#define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen)
+#define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock)
+#define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock)
+#define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128)
+#define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256)
+#define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128)
+#define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256)
+#define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128)
+#define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192)
+#define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256)
+#endif /*CAMELLIA_EXT_SYM_PREFIX*/
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CAMELLIA_BLOCK_SIZE 16
+#define CAMELLIA_TABLE_BYTE_LEN 272
+#define CAMELLIA_TABLE_WORD_LEN (CAMELLIA_TABLE_BYTE_LEN / 4)
+
+typedef unsigned int KEY_TABLE_TYPE[CAMELLIA_TABLE_WORD_LEN];
+
+
+void Camellia_Ekeygen(const int keyBitLength,
+ const unsigned char *rawKey,
+ KEY_TABLE_TYPE keyTable);
+
+#ifndef USE_ARM_ASM
+void Camellia_EncryptBlock(const int keyBitLength,
+ const unsigned char *plaintext,
+ const KEY_TABLE_TYPE keyTable,
+ unsigned char *cipherText);
+
+void Camellia_DecryptBlock(const int keyBitLength,
+ const unsigned char *cipherText,
+ const KEY_TABLE_TYPE keyTable,
+ unsigned char *plaintext);
+#endif /*!USE_ARM_ASM*/
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_CAMELLIA_H */
diff --git a/comm/third_party/libgcrypt/cipher/cast5-amd64.S b/comm/third_party/libgcrypt/cipher/cast5-amd64.S
new file mode 100644
index 0000000000..82f678901d
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cast5-amd64.S
@@ -0,0 +1,663 @@
+/* cast5-amd64.S - AMD64 assembly implementation of CAST5 cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5)
+
+#include "asm-common-amd64.h"
+
+.text
+
+.extern _gcry_cast5_s1to4;
+
+#define s1 0
+#define s2 (s1 + (4 * 256))
+#define s3 (s2 + (4 * 256))
+#define s4 (s3 + (4 * 256))
+
+/* structure of CAST5_context: */
+#define Km 0
+#define Kr (Km + (16 * 4))
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+#define RTAB %r8
+
+#define RLR0 %r9
+#define RLR1 %r10
+#define RLR2 %r11
+#define RLR3 %r12
+
+#define RLR0d %r9d
+#define RLR1d %r10d
+#define RLR2d %r11d
+#define RLR3d %r12d
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %dh
+
+#define RKR %rcx
+#define RKRd %ecx
+#define RKRbl %cl
+
+#define RT0 %rbp
+#define RT1 %rsi
+
+#define RT0d %ebp
+#define RT1d %esi
+
+#define RKM0d %r13d
+#define RKM1d %r14d
+
+/***********************************************************************
+ * 1-way cast5
+ ***********************************************************************/
+#define dummy(x)
+
+#define shr_kr(none) \
+ shrq $8, RKR;
+
+#define F(km, load_next_kr, op0, op1, op2, op3) \
+ op0 ## l RLR0d, km ## d; \
+ roll RKRbl, km ## d; \
+ rorq $32, RLR0; \
+ movzbl km ## bh, RT0d; \
+ movzbl km ## bl, RT1d; \
+ roll $16, km ## d; \
+ movl s1(RTAB,RT0,4), RT0d; \
+ op1 ## l s2(RTAB,RT1,4), RT0d; \
+ load_next_kr(kr_next); \
+ movzbl km ## bh, RT1d; \
+ movzbl km ## bl, km ## d; \
+ op2 ## l s3(RTAB,RT1,4), RT0d; \
+ op3 ## l s4(RTAB,km,4), RT0d; \
+ xorq RT0, RLR0;
+
+#define F1(km, load_next_kr) \
+ F(##km, load_next_kr, add, xor, sub, add)
+#define F2(km, load_next_kr) \
+ F(##km, load_next_kr, xor, sub, add, xor)
+#define F3(km, load_next_kr) \
+ F(##km, load_next_kr, sub, add, xor, sub)
+
+#define get_round_km(n, km) \
+ movl Km+4*(n)(CTX), km;
+
+#define get_round_kr_enc(n) \
+ movq $0x1010101010101010, RKR; \
+ \
+ /* merge rorl rk and rorl $16 */ \
+ xorq Kr+(n)(CTX), RKR;
+
+#define get_round_kr_dec(n) \
+ movq $0x1010101010101010, RKR; \
+ \
+ /* merge rorl rk and rorl $16 */ \
+ xorq Kr+(n - 7)(CTX), RKR; \
+ bswapq RKR;
+
+#define round_enc(n, FA, FB, fn1, fn2) \
+ get_round_km(n + 1, RX2d); \
+ FA(RX0, fn1); \
+ get_round_km(n + 2, RX0d); \
+ FB(RX2, fn2);
+
+#define round_enc_last(n, FXA, FXB) \
+ get_round_km(n + 1, RX2d); \
+ \
+ FXA(RX0, shr_kr); \
+ FXB(RX2, dummy);
+
+#define round_enc_1(n, FA, FB) \
+ round_enc(n, FA, FB, shr_kr, shr_kr)
+
+#define round_enc_2(n, FA, FB) \
+ round_enc(n, FA, FB, shr_kr, dummy)
+
+#define round_dec(n, FA, FB, fn1, fn2) \
+ get_round_km(n - 1, RX2d); \
+ FA(RX0, fn1); \
+ get_round_km(n - 2, RX0d); \
+ FB(RX2, fn2);
+
+#define round_dec_last(n, FXA, FXB) \
+ get_round_km(n - 1, RX2d); \
+ FXA(RX0, shr_kr); \
+ FXB(RX2, dummy);
+
+#define round_dec_1(n, FA, FB) \
+ round_dec(n, FA, FB, shr_kr, shr_kr)
+
+#define round_dec_2(n, FA, FB) \
+ round_dec(n, FA, FB, shr_kr, dummy)
+
+#define read_block() \
+ movq (RIO), RLR0; \
+ bswapq RLR0;
+
+#define write_block() \
+ bswapq RLR0; \
+ rorq $32, RLR0; \
+ movq RLR0, (RIO);
+
+.align 8
+.globl _gcry_cast5_amd64_encrypt_block
+ELF(.type _gcry_cast5_amd64_encrypt_block,@function;)
+
+_gcry_cast5_amd64_encrypt_block:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+
+ movq %rsi, %r10;
+
+ GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+ movq %rdx, RIO;
+ read_block();
+
+ get_round_km(0, RX0d);
+ get_round_kr_enc(0);
+ round_enc_1(0, F1, F2);
+ round_enc_1(2, F3, F1);
+ round_enc_1(4, F2, F3);
+ round_enc_2(6, F1, F2);
+ get_round_kr_enc(8);
+ round_enc_1(8, F3, F1);
+ round_enc_1(10, F2, F3);
+ round_enc_1(12, F1, F2);
+ round_enc_last(14, F3, F1);
+
+ movq %r10, RIO;
+ write_block();
+
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
+
+.align 8
+.globl _gcry_cast5_amd64_decrypt_block
+ELF(.type _gcry_cast5_amd64_decrypt_block,@function;)
+
+_gcry_cast5_amd64_decrypt_block:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+
+ movq %rsi, %r10;
+
+ GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+ movq %rdx, RIO;
+ read_block();
+
+ get_round_km(15, RX0d);
+ get_round_kr_dec(15);
+ round_dec_1(15, F1, F3);
+ round_dec_1(13, F2, F1);
+ round_dec_1(11, F3, F2);
+ round_dec_2(9, F1, F3);
+ get_round_kr_dec(7);
+ round_dec_1(7, F2, F1);
+ round_dec_1(5, F3, F2);
+ round_dec_1(3, F1, F3);
+ round_dec_last(1, F2, F1);
+
+ movq %r10, RIO;
+ write_block();
+
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
+
+/**********************************************************************
+ 4-way cast5, four blocks parallel
+ **********************************************************************/
+#define F_tail(rlr, rx, op1, op2, op3) \
+ movzbl rx ## bh, RT0d; \
+ movzbl rx ## bl, RT1d; \
+ roll $16, rx ## d; \
+ movl s1(RTAB,RT0,4), RT0d; \
+ op1 ## l s2(RTAB,RT1,4), RT0d; \
+ movzbl rx ## bh, RT1d; \
+ movzbl rx ## bl, rx ## d; \
+ op2 ## l s3(RTAB,RT1,4), RT0d; \
+ op3 ## l s4(RTAB,rx,4), RT0d; \
+ xorq RT0, rlr;
+
+#define F4(km, load_next_kr, op0, op1, op2, op3) \
+ movl km, RX0d; \
+ op0 ## l RLR0d, RX0d; \
+ roll RKRbl, RX0d; \
+ rorq $32, RLR0; \
+ \
+ movl km, RX1d; \
+ op0 ## l RLR1d, RX1d; \
+ roll RKRbl, RX1d; \
+ rorq $32, RLR1; \
+ \
+ movl km, RX2d; \
+ op0 ## l RLR2d, RX2d; \
+ roll RKRbl, RX2d; \
+ rorq $32, RLR2; \
+ \
+ F_tail(RLR0, RX0, op1, op2, op3); \
+ F_tail(RLR1, RX1, op1, op2, op3); \
+ F_tail(RLR2, RX2, op1, op2, op3); \
+ \
+ movl km, RX0d; \
+ op0 ## l RLR3d, RX0d; \
+ roll RKRbl, RX0d; \
+ load_next_kr(); \
+ rorq $32, RLR3; \
+ \
+ F_tail(RLR3, RX0, op1, op2, op3);
+
+#define F4_1(km, load_next_kr) \
+ F4(km, load_next_kr, add, xor, sub, add)
+#define F4_2(km, load_next_kr) \
+ F4(km, load_next_kr, xor, sub, add, xor)
+#define F4_3(km, load_next_kr) \
+ F4(km, load_next_kr, sub, add, xor, sub)
+
+#define round_enc4(n, FA, FB, fn1, fn2) \
+ get_round_km(n + 1, RKM1d); \
+ FA(RKM0d, fn1); \
+ get_round_km(n + 2, RKM0d); \
+ FB(RKM1d, fn2);
+
+#define round_enc_last4(n, FXA, FXB) \
+ get_round_km(n + 1, RKM1d); \
+ FXA(RKM0d, shr_kr); \
+ FXB(RKM1d, dummy);
+
+#define round_enc4_1(n, FA, FB) \
+ round_enc4(n, FA, FB, shr_kr, shr_kr);
+
+#define round_enc4_2(n, FA, FB) \
+ round_enc4(n, FA, FB, shr_kr, dummy);
+
+#define round_dec4(n, FA, FB, fn1, fn2) \
+ get_round_km(n - 1, RKM1d); \
+ FA(RKM0d, fn1); \
+ get_round_km(n - 2, RKM0d); \
+ FB(RKM1d, fn2);
+
+#define round_dec_last4(n, FXA, FXB) \
+ get_round_km(n - 1, RKM1d); \
+ FXA(RKM0d, shr_kr); \
+ FXB(RKM1d, dummy);
+
+#define round_dec4_1(n, FA, FB) \
+ round_dec4(n, FA, FB, shr_kr, shr_kr);
+
+#define round_dec4_2(n, FA, FB) \
+ round_dec4(n, FA, FB, shr_kr, dummy);
+
+#define inbswap_block4(a, b, c, d) \
+ bswapq a; \
+ bswapq b; \
+ bswapq c; \
+ bswapq d;
+
+#define outbswap_block4(a, b, c, d) \
+ bswapq a; \
+ bswapq b; \
+ bswapq c; \
+ bswapq d; \
+ rorq $32, a; \
+ rorq $32, b; \
+ rorq $32, c; \
+ rorq $32, d;
+
+.align 8
+ELF(.type __cast5_enc_blk4,@function;)
+
+__cast5_enc_blk4:
+ /* input:
+ * %rdi: ctx, CTX
+ * RLR0,RLR1,RLR2,RLR3: four input plaintext blocks
+ * output:
+ * RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks
+ */
+ CFI_STARTPROC();
+ GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+ get_round_km(0, RKM0d);
+ get_round_kr_enc(0);
+ round_enc4_1(0, F4_1, F4_2);
+ round_enc4_1(2, F4_3, F4_1);
+ round_enc4_1(4, F4_2, F4_3);
+ round_enc4_2(6, F4_1, F4_2);
+ get_round_kr_enc(8);
+ round_enc4_1(8, F4_3, F4_1);
+ round_enc4_1(10, F4_2, F4_3);
+ round_enc4_1(12, F4_1, F4_2);
+ round_enc_last4(14, F4_3, F4_1);
+
+ outbswap_block4(RLR0, RLR1, RLR2, RLR3);
+ ret;
+ CFI_ENDPROC();
+ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
+
+.align 8
+ELF(.type __cast5_dec_blk4,@function;)
+
+__cast5_dec_blk4:
+ /* input:
+ * %rdi: ctx, CTX
+ * RLR0,RLR1,RLR2,RLR3: four input ciphertext blocks
+ * output:
+ * RLR0,RLR1,RLR2,RLR3: four output plaintext blocks
+ */
+ CFI_STARTPROC();
+ GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+ inbswap_block4(RLR0, RLR1, RLR2, RLR3);
+
+ get_round_km(15, RKM0d);
+ get_round_kr_dec(15);
+ round_dec4_1(15, F4_1, F4_3);
+ round_dec4_1(13, F4_2, F4_1);
+ round_dec4_1(11, F4_3, F4_2);
+ round_dec4_2(9, F4_1, F4_3);
+ get_round_kr_dec(7);
+ round_dec4_1(7, F4_2, F4_1);
+ round_dec4_1(5, F4_3, F4_2);
+ round_dec4_1(3, F4_1, F4_3);
+ round_dec_last4(1, F4_2, F4_1);
+
+ outbswap_block4(RLR0, RLR1, RLR2, RLR3);
+ CFI_ENDPROC();
+ ret;
+ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
+
+.align 8
+.globl _gcry_cast5_amd64_ctr_enc
+ELF(.type _gcry_cast5_amd64_ctr_enc,@function;)
+_gcry_cast5_amd64_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
+ * %rcx: iv (big endian, 64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+ pushq %r14;
+ CFI_PUSH(%r14);
+
+ pushq %rsi;
+ CFI_PUSH(%rsi);
+ pushq %rdx;
+ CFI_PUSH(%rdx);
+
+ /* load IV and byteswap */
+ movq (%rcx), RX0;
+ bswapq RX0;
+ movq RX0, RLR0;
+
+ /* construct IVs */
+ leaq 1(RX0), RLR1;
+ leaq 2(RX0), RLR2;
+ leaq 3(RX0), RLR3;
+ leaq 4(RX0), RX0;
+ bswapq RX0;
+
+ /* store new IV */
+ movq RX0, (%rcx);
+
+ call __cast5_enc_blk4;
+
+ popq %r14; /*src*/
+ CFI_POP_TMP_REG();
+ popq %r13; /*dst*/
+ CFI_POP_TMP_REG();
+
+ /* XOR key-stream with plaintext */
+ xorq 0 * 8(%r14), RLR0;
+ xorq 1 * 8(%r14), RLR1;
+ xorq 2 * 8(%r14), RLR2;
+ xorq 3 * 8(%r14), RLR3;
+ movq RLR0, 0 * 8(%r13);
+ movq RLR1, 1 * 8(%r13);
+ movq RLR2, 2 * 8(%r13);
+ movq RLR3, 3 * 8(%r13);
+
+ popq %r14;
+ CFI_POP(%r14);
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
+
+.align 8
+.globl _gcry_cast5_amd64_cbc_dec
+ELF(.type _gcry_cast5_amd64_cbc_dec,@function;)
+_gcry_cast5_amd64_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
+ * %rcx: iv (64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+ pushq %r14;
+ CFI_PUSH(%r14);
+
+ pushq %rcx;
+ CFI_PUSH(%rcx);
+ pushq %rsi;
+ CFI_PUSH(%rsi);
+ pushq %rdx;
+ CFI_PUSH(%rdx);
+
+ /* load input */
+ movq 0 * 8(%rdx), RLR0;
+ movq 1 * 8(%rdx), RLR1;
+ movq 2 * 8(%rdx), RLR2;
+ movq 3 * 8(%rdx), RLR3;
+
+ call __cast5_dec_blk4;
+
+ popq RX0; /*src*/
+ CFI_POP_TMP_REG();
+ popq RX1; /*dst*/
+ CFI_POP_TMP_REG();
+ popq RX2; /*iv*/
+ CFI_POP_TMP_REG();
+
+ movq 3 * 8(RX0), %r14;
+ xorq (RX2), RLR0;
+ xorq 0 * 8(RX0), RLR1;
+ xorq 1 * 8(RX0), RLR2;
+ xorq 2 * 8(RX0), RLR3;
+ movq %r14, (RX2); /* store new IV */
+
+ movq RLR0, 0 * 8(RX1);
+ movq RLR1, 1 * 8(RX1);
+ movq RLR2, 2 * 8(RX1);
+ movq RLR3, 3 * 8(RX1);
+
+ popq %r14;
+ CFI_POP(%r14);
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
+
+.align 8
+.globl _gcry_cast5_amd64_cfb_dec
+ELF(.type _gcry_cast5_amd64_cfb_dec,@function;)
+_gcry_cast5_amd64_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (4 blocks)
+ * %rdx: src (4 blocks)
+ * %rcx: iv (64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+ pushq %r14;
+ CFI_PUSH(%r14);
+
+ pushq %rsi;
+ CFI_PUSH(%rsi);
+ pushq %rdx;
+ CFI_PUSH(%rdx);
+
+ /* Load input */
+ movq (%rcx), RLR0;
+ movq 0 * 8(%rdx), RLR1;
+ movq 1 * 8(%rdx), RLR2;
+ movq 2 * 8(%rdx), RLR3;
+
+ inbswap_block4(RLR0, RLR1, RLR2, RLR3);
+
+ /* Update IV */
+ movq 3 * 8(%rdx), %rdx;
+ movq %rdx, (%rcx);
+
+ call __cast5_enc_blk4;
+
+ popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
+ popq %rcx; /*dst*/
+ CFI_POP_TMP_REG();
+
+ xorq 0 * 8(%rdx), RLR0;
+ xorq 1 * 8(%rdx), RLR1;
+ xorq 2 * 8(%rdx), RLR2;
+ xorq 3 * 8(%rdx), RLR3;
+ movq RLR0, 0 * 8(%rcx);
+ movq RLR1, 1 * 8(%rcx);
+ movq RLR2, 2 * 8(%rcx);
+ movq RLR3, 3 * 8(%rcx);
+
+ popq %r14;
+ CFI_POP(%r14);
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;)
+
+#endif /*defined(USE_CAST5)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/cast5-arm.S b/comm/third_party/libgcrypt/cipher/cast5-arm.S
new file mode 100644
index 0000000000..76ddd2e335
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cast5-arm.S
@@ -0,0 +1,728 @@
+/* cast5-arm.S - ARM assembly implementation of CAST5 cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+.extern _gcry_cast5_s1to4;
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* structure of crypto context */
+#define Km 0
+#define Kr (Km + (16 * 4))
+#define Kr_arm_enc (Kr + (16))
+#define Kr_arm_dec (Kr_arm_enc + (16))
+
+/* register macros */
+#define CTX %r0
+#define Rs1 %r7
+#define Rs2 %r8
+#define Rs3 %r9
+#define Rs4 %r10
+#define RMASK %r11
+#define RKM %r1
+#define RKR %r2
+
+#define RL0 %r3
+#define RR0 %r4
+
+#define RL1 %r9
+#define RR1 %r10
+
+#define RT0 %lr
+#define RT1 %ip
+#define RT2 %r5
+#define RT3 %r6
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 0)]; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 3)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+ mov rtmp0, rin, lsr #8; \
+ strb rin, [rdst, #((offs) + 0)]; \
+ mov rtmp1, rin, lsr #16; \
+ strb rtmp0, [rdst, #((offs) + 1)]; \
+ mov rtmp0, rin, lsr #24; \
+ strb rtmp1, [rdst, #((offs) + 2)]; \
+ strb rtmp0, [rdst, #((offs) + 3)];
+
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 3)]; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 0)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \
+ mov rtmp0, rin, lsr #8; \
+ strb rin, [rdst, #((offs) + 3)]; \
+ mov rtmp1, rin, lsr #16; \
+ strb rtmp0, [rdst, #((offs) + 2)]; \
+ mov rtmp0, rin, lsr #24; \
+ strb rtmp1, [rdst, #((offs) + 1)]; \
+ strb rtmp0, [rdst, #((offs) + 0)];
+
+#ifdef __ARMEL__
+ #define ldr_unaligned_host ldr_unaligned_le
+ #define str_unaligned_host str_unaligned_le
+
+ /* bswap on little-endian */
+#ifdef HAVE_ARM_ARCH_V6
+ #define host_to_be(reg, rtmp) \
+ rev reg, reg;
+ #define be_to_host(reg, rtmp) \
+ rev reg, reg;
+#else
+ #define host_to_be(reg, rtmp) \
+ eor rtmp, reg, reg, ror #16; \
+ mov rtmp, rtmp, lsr #8; \
+ bic rtmp, rtmp, #65280; \
+ eor reg, rtmp, reg, ror #8;
+ #define be_to_host(reg, rtmp) \
+ eor rtmp, reg, reg, ror #16; \
+ mov rtmp, rtmp, lsr #8; \
+ bic rtmp, rtmp, #65280; \
+ eor reg, rtmp, reg, ror #8;
+#endif
+#else
+ #define ldr_unaligned_host ldr_unaligned_be
+ #define str_unaligned_host str_unaligned_be
+
+ /* nop on big-endian */
+ #define host_to_be(reg, rtmp) /*_*/
+ #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define host_to_host(x, y) /*_*/
+
+/**********************************************************************
+ 1-way cast5
+ **********************************************************************/
+
+#define dummy(n) /*_*/
+
+#define load_kr(n) \
+ ldr RKR, [CTX, #(Kr_arm_enc + (n))]; /* Kr[n] */
+
+#define load_dec_kr(n) \
+ ldr RKR, [CTX, #(Kr_arm_dec + (n) - 3)]; /* Kr[n] */
+
+#define load_km(n) \
+ ldr RKM, [CTX, #(Km + (n) * 4)]; /* Km[n] */
+
+#define shift_kr(dummy) \
+ mov RKR, RKR, lsr #8;
+
+#define F(n, rl, rr, op1, op2, op3, op4, dec, loadkm, shiftkr, loadkr) \
+ op1 RKM, rr; \
+ mov RKM, RKM, ror RKR; \
+ \
+ and RT0, RMASK, RKM, ror #(24); \
+ and RT1, RMASK, RKM, lsr #(16); \
+ and RT2, RMASK, RKM, lsr #(8); \
+ ldr RT0, [Rs1, RT0]; \
+ and RT3, RMASK, RKM; \
+ ldr RT1, [Rs2, RT1]; \
+ shiftkr(RKR); \
+ \
+ ldr RT2, [Rs3, RT2]; \
+ \
+ op2 RT0, RT1; \
+ ldr RT3, [Rs4, RT3]; \
+ op3 RT0, RT2; \
+ loadkm((n) + (1 - ((dec) * 2))); \
+ op4 RT0, RT3; \
+ loadkr((n) + (1 - ((dec) * 2))); \
+ eor rl, RT0;
+
+#define F1(n, rl, rr, dec, loadkm, shiftkr, loadkr) \
+ F(n, rl, rr, add, eor, sub, add, dec, loadkm, shiftkr, loadkr)
+#define F2(n, rl, rr, dec, loadkm, shiftkr, loadkr) \
+ F(n, rl, rr, eor, sub, add, eor, dec, loadkm, shiftkr, loadkr)
+#define F3(n, rl, rr, dec, loadkm, shiftkr, loadkr) \
+ F(n, rl, rr, sub, add, eor, sub, dec, loadkm, shiftkr, loadkr)
+
+#define enc_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+ Fx(n, rl, rr, 0, loadkm, shiftkr, loadkr)
+
+#define dec_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+ Fx(n, rl, rr, 1, loadkm, shiftkr, loadkr)
+
+#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \
+ ldr l0, [rin, #((offs) + 0)]; \
+ ldr r0, [rin, #((offs) + 4)]; \
+ convert(l0, rtmp); \
+ convert(r0, rtmp);
+
+#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \
+ convert(l0, rtmp); \
+ convert(r0, rtmp); \
+ str l0, [rout, #((offs) + 0)]; \
+ str r0, [rout, #((offs) + 4)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+ /* unaligned word reads allowed */
+ #define read_block(rin, offs, l0, r0, rtmp0) \
+ read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0)
+
+ #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \
+ write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0)
+
+ #define read_block_host(rin, offs, l0, r0, rtmp0) \
+ read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0)
+
+ #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \
+ write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0)
+#else
+ /* need to handle unaligned reads by byte reads */
+ #define read_block(rin, offs, l0, r0, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \
+ ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \
+ 2:;
+
+ #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \
+ str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \
+ 2:;
+
+ #define read_block_host(rin, offs, l0, r0, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \
+ ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \
+ 2:;
+
+ #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \
+ str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block_aligned(rout, offs, l0, r0, host_to_host, rtmp0); \
+ 2:;
+#endif
+
+.align 3
+.globl _gcry_cast5_arm_encrypt_block
+.type _gcry_cast5_arm_encrypt_block,%function;
+
+_gcry_cast5_arm_encrypt_block:
+ /* input:
+ * %r0: CTX
+ * %r1: dst
+ * %r2: src
+ */
+ push {%r1, %r4-%r11, %ip, %lr};
+
+ GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+ mov RMASK, #(0xff << 2);
+ add Rs2, Rs1, #(0x100*4);
+ add Rs3, Rs1, #(0x100*4*2);
+ add Rs4, Rs1, #(0x100*4*3);
+
+ read_block(%r2, 0, RL0, RR0, RT0);
+
+ load_km(0);
+ load_kr(0);
+ enc_round(0, F1, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(1, F2, RR0, RL0, load_km, shift_kr, dummy);
+ enc_round(2, F3, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(3, F1, RR0, RL0, load_km, dummy, load_kr);
+ enc_round(4, F2, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(5, F3, RR0, RL0, load_km, shift_kr, dummy);
+ enc_round(6, F1, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(7, F2, RR0, RL0, load_km, dummy, load_kr);
+ enc_round(8, F3, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(9, F1, RR0, RL0, load_km, shift_kr, dummy);
+ enc_round(10, F2, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(11, F3, RR0, RL0, load_km, dummy, load_kr);
+ enc_round(12, F1, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(13, F2, RR0, RL0, load_km, shift_kr, dummy);
+ enc_round(14, F3, RL0, RR0, load_km, shift_kr, dummy);
+ enc_round(15, F1, RR0, RL0, dummy, dummy, dummy);
+
+ ldr %r1, [%sp], #4;
+ write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_encrypt_block,.-_gcry_cast5_arm_encrypt_block;
+
+.align 3
+.globl _gcry_cast5_arm_decrypt_block
+.type _gcry_cast5_arm_decrypt_block,%function;
+
+_gcry_cast5_arm_decrypt_block:
+ /* input:
+ * %r0: CTX
+ * %r1: dst
+ * %r2: src
+ */
+ push {%r1, %r4-%r11, %ip, %lr};
+
+ GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+ mov RMASK, #(0xff << 2);
+ add Rs2, Rs1, #(0x100 * 4);
+ add Rs3, Rs1, #(0x100 * 4 * 2);
+ add Rs4, Rs1, #(0x100 * 4 * 3);
+
+ read_block(%r2, 0, RL0, RR0, RT0);
+
+ load_km(15);
+ load_dec_kr(15);
+ dec_round(15, F1, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(14, F3, RR0, RL0, load_km, shift_kr, dummy);
+ dec_round(13, F2, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(12, F1, RR0, RL0, load_km, dummy, load_dec_kr);
+ dec_round(11, F3, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(10, F2, RR0, RL0, load_km, shift_kr, dummy);
+ dec_round(9, F1, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(8, F3, RR0, RL0, load_km, dummy, load_dec_kr);
+ dec_round(7, F2, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(6, F1, RR0, RL0, load_km, shift_kr, dummy);
+ dec_round(5, F3, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(4, F2, RR0, RL0, load_km, dummy, load_dec_kr);
+ dec_round(3, F1, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(2, F3, RR0, RL0, load_km, shift_kr, dummy);
+ dec_round(1, F2, RL0, RR0, load_km, shift_kr, dummy);
+ dec_round(0, F1, RR0, RL0, dummy, dummy, dummy);
+
+ ldr %r1, [%sp], #4;
+ write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_decrypt_block,.-_gcry_cast5_arm_decrypt_block;
+
+/**********************************************************************
+ 2-way cast5
+ **********************************************************************/
+
+#define F_2w(n, rl0, rr0, rl1, rr1, op1, op2, op3, op4, dec, loadkm, shiftkr, \
+ loadkr) \
+ op1 RT3, RKM, rr0; \
+ op1 RKM, RKM, rr1; \
+ mov RT3, RT3, ror RKR; \
+ mov RKM, RKM, ror RKR; \
+ \
+ and RT0, RMASK, RT3, ror #(24); \
+ and RT1, RMASK, RT3, lsr #(16); \
+ and RT2, RMASK, RT3, lsr #(8); \
+ and RT3, RMASK, RT3; \
+ \
+ ldr RT0, [Rs1, RT0]; \
+ add RT2, #(0x100 * 4); \
+ ldr RT1, [Rs2, RT1]; \
+ add RT3, #(0x100 * 4 * 2); \
+ \
+ ldr RT2, [Rs2, RT2]; \
+ \
+ op2 RT0, RT1; \
+ ldr RT3, [Rs2, RT3]; \
+ and RT1, RMASK, RKM, ror #(24); \
+ op3 RT0, RT2; \
+ and RT2, RMASK, RKM, lsr #(16); \
+ op4 RT0, RT3; \
+ and RT3, RMASK, RKM, lsr #(8); \
+ eor rl0, RT0; \
+ add RT3, #(0x100 * 4); \
+ ldr RT1, [Rs1, RT1]; \
+ and RT0, RMASK, RKM; \
+ ldr RT2, [Rs2, RT2]; \
+ add RT0, #(0x100 * 4 * 2); \
+ \
+ ldr RT3, [Rs2, RT3]; \
+ \
+ op2 RT1, RT2; \
+ ldr RT0, [Rs2, RT0]; \
+ op3 RT1, RT3; \
+ loadkm((n) + (1 - ((dec) * 2))); \
+ op4 RT1, RT0; \
+ loadkr((n) + (1 - ((dec) * 2))); \
+ shiftkr(RKR); \
+ eor rl1, RT1;
+
+#define F1_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \
+ F_2w(n, rl0, rr0, rl1, rr1, add, eor, sub, add, dec, \
+ loadkm, shiftkr, loadkr)
+#define F2_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \
+ F_2w(n, rl0, rr0, rl1, rr1, eor, sub, add, eor, dec, \
+ loadkm, shiftkr, loadkr)
+#define F3_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \
+ F_2w(n, rl0, rr0, rl1, rr1, sub, add, eor, sub, dec, \
+ loadkm, shiftkr, loadkr)
+
+#define enc_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+ Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 0, loadkm, shiftkr, loadkr)
+
+#define dec_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+ Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 1, loadkm, shiftkr, loadkr)
+
+#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \
+ ldr l0, [rin, #(0)]; \
+ ldr r0, [rin, #(4)]; \
+ convert(l0, rtmp); \
+ ldr l1, [rin, #(8)]; \
+ convert(r0, rtmp); \
+ ldr r1, [rin, #(12)]; \
+ convert(l1, rtmp); \
+ convert(r1, rtmp);
+
+#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \
+ convert(l0, rtmp); \
+ convert(r0, rtmp); \
+ convert(l1, rtmp); \
+ str l0, [rout, #(0)]; \
+ convert(r1, rtmp); \
+ str r0, [rout, #(4)]; \
+ str l1, [rout, #(8)]; \
+ str r1, [rout, #(12)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+ /* unaligned word reads allowed */
+ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0)
+
+ #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0)
+
+ #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0)
+
+ #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0)
+#else
+ /* need to handle unaligned reads by byte reads */
+ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_be(l0, rin, 0, rtmp0); \
+ ldr_unaligned_be(r0, rin, 4, rtmp0); \
+ ldr_unaligned_be(l1, rin, 8, rtmp0); \
+ ldr_unaligned_be(r1, rin, 12, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \
+ 2:;
+
+ #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \
+ str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \
+ str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \
+ str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \
+ 2:;
+
+ #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_host(l0, rin, 0, rtmp0); \
+ ldr_unaligned_host(r0, rin, 4, rtmp0); \
+ ldr_unaligned_host(l1, rin, 8, rtmp0); \
+ ldr_unaligned_host(r1, rin, 12, rtmp0); \
+ b 2f; \
+ 1:;\
+ read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \
+ 2:;
+
+ #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \
+ str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \
+ str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \
+ str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \
+ 2:;
+#endif
+
+.align 3
+.type _gcry_cast5_arm_enc_blk2,%function;
+
+_gcry_cast5_arm_enc_blk2:
+ /* input:
+ * preloaded: CTX
+ * [RL0, RR0], [RL1, RR1]: src
+ * output:
+ * [RR0, RL0], [RR1, RL1]: dst
+ */
+ push {%lr};
+
+ GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+ mov RMASK, #(0xff << 2);
+ add Rs2, Rs1, #(0x100 * 4);
+
+ load_km(0);
+ load_kr(0);
+ enc_round2(0, F1, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(1, F2, RR, RL, load_km, shift_kr, dummy);
+ enc_round2(2, F3, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(3, F1, RR, RL, load_km, dummy, load_kr);
+ enc_round2(4, F2, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(5, F3, RR, RL, load_km, shift_kr, dummy);
+ enc_round2(6, F1, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(7, F2, RR, RL, load_km, dummy, load_kr);
+ enc_round2(8, F3, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(9, F1, RR, RL, load_km, shift_kr, dummy);
+ enc_round2(10, F2, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(11, F3, RR, RL, load_km, dummy, load_kr);
+ enc_round2(12, F1, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(13, F2, RR, RL, load_km, shift_kr, dummy);
+ enc_round2(14, F3, RL, RR, load_km, shift_kr, dummy);
+ enc_round2(15, F1, RR, RL, dummy, dummy, dummy);
+
+ host_to_be(RR0, RT0);
+ host_to_be(RL0, RT0);
+ host_to_be(RR1, RT0);
+ host_to_be(RL1, RT0);
+
+ pop {%pc};
+.ltorg
+.size _gcry_cast5_arm_enc_blk2,.-_gcry_cast5_arm_enc_blk2;
+
+.align 3
+.globl _gcry_cast5_arm_cfb_dec;
+.type _gcry_cast5_arm_cfb_dec,%function;
+
+_gcry_cast5_arm_cfb_dec:
+ /* input:
+ * %r0: CTX
+ * %r1: dst (2 blocks)
+ * %r2: src (2 blocks)
+ * %r3: iv (64bit)
+ */
+ push {%r1, %r2, %r4-%r11, %ip, %lr};
+
+ mov %lr, %r3;
+
+ /* Load input (iv/%r3 is aligned, src/%r2 might not be) */
+ ldm %r3, {RL0, RR0};
+ host_to_be(RL0, RT1);
+ host_to_be(RR0, RT1);
+ read_block(%r2, 0, RL1, RR1, %ip);
+
+ /* Update IV, load src[1] and save to iv[0] */
+ read_block_host(%r2, 8, %r5, %r6, %r7);
+ stm %lr, {%r5, %r6};
+
+ bl _gcry_cast5_arm_enc_blk2;
+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+ /* %r0: dst, %r1: %src */
+ pop {%r0, %r1};
+
+ /* dst = src ^ result */
+ read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr);
+ eor %r5, %r4;
+ eor %r6, %r3;
+ eor %r7, %r10;
+ eor %r8, %r9;
+ write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_cfb_dec,.-_gcry_cast5_arm_cfb_dec;
+
+.align 3
+.globl _gcry_cast5_arm_ctr_enc;
+.type _gcry_cast5_arm_ctr_enc,%function;
+
+_gcry_cast5_arm_ctr_enc:
+ /* input:
+ * %r0: CTX
+ * %r1: dst (2 blocks)
+ * %r2: src (2 blocks)
+ * %r3: iv (64bit, big-endian)
+ */
+ push {%r1, %r2, %r4-%r11, %ip, %lr};
+
+ mov %lr, %r3;
+
+ /* Load IV (big => host endian) */
+ read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT1);
+
+ /* Construct IVs */
+ adds RR1, RR0, #1; /* +1 */
+ adc RL1, RL0, #0;
+ adds %r6, RR1, #1; /* +2 */
+ adc %r5, RL1, #0;
+
+ /* Store new IV (host => big-endian) */
+ write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT1);
+
+ bl _gcry_cast5_arm_enc_blk2;
+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+ /* %r0: dst, %r1: %src */
+ pop {%r0, %r1};
+
+ /* XOR key-stream with plaintext */
+ read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr);
+ eor %r5, %r4;
+ eor %r6, %r3;
+ eor %r7, %r10;
+ eor %r8, %r9;
+ write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_ctr_enc,.-_gcry_cast5_arm_ctr_enc;
+
+.align 3
+.type _gcry_cast5_arm_dec_blk2,%function;
+
+_gcry_cast5_arm_dec_blk2:
+ /* input:
+ * preloaded: CTX
+ * [RL0, RR0], [RL1, RR1]: src
+ * output:
+ * [RR0, RL0], [RR1, RL1]: dst
+ */
+
+ GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+ mov RMASK, #(0xff << 2);
+ add Rs2, Rs1, #(0x100 * 4);
+
+ load_km(15);
+ load_dec_kr(15);
+ dec_round2(15, F1, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(14, F3, RR, RL, load_km, shift_kr, dummy);
+ dec_round2(13, F2, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(12, F1, RR, RL, load_km, dummy, load_dec_kr);
+ dec_round2(11, F3, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(10, F2, RR, RL, load_km, shift_kr, dummy);
+ dec_round2(9, F1, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(8, F3, RR, RL, load_km, dummy, load_dec_kr);
+ dec_round2(7, F2, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(6, F1, RR, RL, load_km, shift_kr, dummy);
+ dec_round2(5, F3, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(4, F2, RR, RL, load_km, dummy, load_dec_kr);
+ dec_round2(3, F1, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(2, F3, RR, RL, load_km, shift_kr, dummy);
+ dec_round2(1, F2, RL, RR, load_km, shift_kr, dummy);
+ dec_round2(0, F1, RR, RL, dummy, dummy, dummy);
+
+ host_to_be(RR0, RT0);
+ host_to_be(RL0, RT0);
+ host_to_be(RR1, RT0);
+ host_to_be(RL1, RT0);
+
+ b .Ldec_cbc_tail;
+.ltorg
+.size _gcry_cast5_arm_dec_blk2,.-_gcry_cast5_arm_dec_blk2;
+
+.align 3
+.globl _gcry_cast5_arm_cbc_dec;
+.type _gcry_cast5_arm_cbc_dec,%function;
+
+_gcry_cast5_arm_cbc_dec:
+ /* input:
+ * %r0: CTX
+ * %r1: dst (2 blocks)
+ * %r2: src (2 blocks)
+ * %r3: iv (64bit)
+ */
+ push {%r1-%r11, %ip, %lr};
+
+ read_block2(%r2, RL0, RR0, RL1, RR1, RT0);
+
+ /* dec_blk2 is only used by cbc_dec, jump directly in/out instead
+ * of function call. */
+ b _gcry_cast5_arm_dec_blk2;
+.Ldec_cbc_tail:
+ /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+ /* %r0: dst, %r1: %src, %r2: iv */
+ pop {%r0-%r2};
+
+ /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */
+ read_block_host(%r1, 0, %r7, %r8, %r5);
+ /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */
+ ldm %r2, {%r5, %r6};
+
+ /* out[1] ^= IV+1 */
+ eor %r10, %r7;
+ eor %r9, %r8;
+ /* out[0] ^= IV */
+ eor %r4, %r5;
+ eor %r3, %r6;
+
+ /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */
+ read_block_host(%r1, 8, %r7, %r8, %r5);
+ /* store IV+2 to iv[0] (aligned). */
+ stm %r2, {%r7, %r8};
+
+ /* store result to dst[0-3]. Might be unaligned. */
+ write_block2_host(%r0, %r4, %r3, %r10, %r9, %r5, %r6);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_cbc_dec,.-_gcry_cast5_arm_cbc_dec;
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARM_ARCH >= 6*/
diff --git a/comm/third_party/libgcrypt/cipher/cast5.c b/comm/third_party/libgcrypt/cipher/cast5.c
new file mode 100644
index 0000000000..837ea0fe57
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cast5.c
@@ -0,0 +1,1238 @@
+/* cast5.c - CAST5 cipher (RFC2144)
+ * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* Test vectors:
+ *
+ * 128-bit key = 01 23 45 67 12 34 56 78 23 45 67 89 34 56 78 9A
+ * plaintext = 01 23 45 67 89 AB CD EF
+ * ciphertext = 23 8B 4F E5 84 7E 44 B2
+ *
+ * 80-bit key = 01 23 45 67 12 34 56 78 23 45
+ * = 01 23 45 67 12 34 56 78 23 45 00 00 00 00 00 00
+ * plaintext = 01 23 45 67 89 AB CD EF
+ * ciphertext = EB 6A 71 1A 2C 02 27 1B
+ *
+ * 40-bit key = 01 23 45 67 12
+ * = 01 23 45 67 12 00 00 00 00 00 00 00 00 00 00 00
+ * plaintext = 01 23 45 67 89 AB CD EF
+ * ciphertext = 7A C8 16 D1 6E 9B 30 2E
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "g10lib.h"
+#include "types.h"
+#include "cipher.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+# define USE_ARM_ASM 1
+# endif
+#endif
+
+#define CAST5_BLOCKSIZE 8
+
+typedef struct {
+ u32 Km[16];
+ byte Kr[16];
+#ifdef USE_ARM_ASM
+ u32 Kr_arm_enc[16 / sizeof(u32)];
+ u32 Kr_arm_dec[16 / sizeof(u32)];
+#endif
+} CAST5_context;
+
+static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops);
+static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf);
+static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf);
+
+
+
+#define s1 _gcry_cast5_s1to4[0]
+#define s2 _gcry_cast5_s1to4[1]
+#define s3 _gcry_cast5_s1to4[2]
+#define s4 _gcry_cast5_s1to4[3]
+
+const u32 _gcry_cast5_s1to4[4][256] = { {
+0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949,
+0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
+0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
+0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0,
+0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7,
+0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935,
+0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d,
+0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50,
+0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe,
+0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3,
+0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167,
+0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291,
+0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779,
+0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2,
+0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511,
+0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d,
+0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5,
+0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324,
+0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c,
+0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc,
+0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d,
+0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96,
+0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a,
+0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d,
+0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd,
+0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6,
+0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9,
+0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872,
+0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c,
+0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e,
+0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9,
+0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
+}, {
+0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651,
+0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
+0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb,
+0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806,
+0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b,
+0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359,
+0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b,
+0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c,
+0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34,
+0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb,
+0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd,
+0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860,
+0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b,
+0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304,
+0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b,
+0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf,
+0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c,
+0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13,
+0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f,
+0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6,
+0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6,
+0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58,
+0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906,
+0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d,
+0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6,
+0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4,
+0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6,
+0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f,
+0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249,
+0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
+0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9,
+0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1
+}, {
+0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90,
+0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5,
+0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e,
+0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240,
+0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5,
+0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b,
+0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71,
+0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04,
+0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82,
+0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15,
+0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2,
+0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176,
+0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148,
+0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc,
+0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341,
+0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e,
+0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51,
+0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f,
+0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a,
+0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b,
+0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b,
+0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5,
+0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45,
+0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536,
+0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc,
+0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0,
+0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69,
+0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2,
+0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49,
+0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d,
+0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a,
+0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783
+}, {
+0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1,
+0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf,
+0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15,
+0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121,
+0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25,
+0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5,
+0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb,
+0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5,
+0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d,
+0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6,
+0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23,
+0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003,
+0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6,
+0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119,
+0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24,
+0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a,
+0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79,
+0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df,
+0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26,
+0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab,
+0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7,
+0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417,
+0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2,
+0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2,
+0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a,
+0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919,
+0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef,
+0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876,
+0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab,
+0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04,
+0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282,
+0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
+} };
+static const u32 s5[256] = {
+0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f,
+0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a,
+0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff,
+0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02,
+0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a,
+0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7,
+0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9,
+0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981,
+0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774,
+0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655,
+0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2,
+0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910,
+0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1,
+0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da,
+0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049,
+0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f,
+0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba,
+0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be,
+0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3,
+0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840,
+0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4,
+0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2,
+0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7,
+0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5,
+0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e,
+0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e,
+0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801,
+0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad,
+0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0,
+0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20,
+0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8,
+0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4
+};
+static const u32 s6[256] = {
+0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac,
+0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138,
+0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367,
+0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98,
+0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072,
+0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3,
+0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd,
+0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8,
+0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9,
+0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54,
+0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387,
+0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc,
+0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf,
+0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf,
+0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f,
+0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289,
+0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950,
+0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f,
+0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b,
+0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be,
+0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13,
+0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976,
+0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0,
+0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891,
+0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da,
+0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc,
+0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084,
+0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25,
+0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121,
+0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5,
+0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd,
+0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f
+};
+static const u32 s7[256] = {
+0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f,
+0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de,
+0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43,
+0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19,
+0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2,
+0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516,
+0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88,
+0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816,
+0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756,
+0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a,
+0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264,
+0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688,
+0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28,
+0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3,
+0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7,
+0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06,
+0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033,
+0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a,
+0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566,
+0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509,
+0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962,
+0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e,
+0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c,
+0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c,
+0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285,
+0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301,
+0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be,
+0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767,
+0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647,
+0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914,
+0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c,
+0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3
+};
+static const u32 s8[256] = {
+0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5,
+0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc,
+0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd,
+0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d,
+0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2,
+0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862,
+0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc,
+0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c,
+0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e,
+0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039,
+0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8,
+0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42,
+0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5,
+0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472,
+0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225,
+0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c,
+0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb,
+0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054,
+0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70,
+0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc,
+0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c,
+0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3,
+0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4,
+0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101,
+0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f,
+0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e,
+0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a,
+0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c,
+0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384,
+0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c,
+0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82,
+0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e
+};
+
+
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementations of CAST5. */
+extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf,
+ const byte *inbuf);
+
+extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf,
+ const byte *inbuf);
+
+/* These assembly implementations process four blocks in parallel. */
+extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+static void
+do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
+}
+
+static void
+cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr)
+{
+ _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr);
+}
+
+static void
+cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+ _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv);
+}
+
+static void
+cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+ _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+ CAST5_context *c = (CAST5_context *) context;
+ do_encrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (2*8);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ CAST5_context *c = (CAST5_context *) context;
+ do_decrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (2*8);
+}
+
+#elif defined(USE_ARM_ASM)
+
+/* ARM assembly implementations of CAST5. */
+extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf,
+ const byte *inbuf);
+
+extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf,
+ const byte *inbuf);
+
+/* These assembly implementations process two blocks in parallel. */
+extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+static void
+do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+ CAST5_context *c = (CAST5_context *) context;
+ do_encrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (10*4);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ CAST5_context *c = (CAST5_context *) context;
+ do_decrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (10*4);
+}
+
+#else /*USE_ARM_ASM*/
+
+#define F1(D,m,r) ( (I = ((m) + (D))), (I=rol(I,(r))), \
+ (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) )
+#define F2(D,m,r) ( (I = ((m) ^ (D))), (I=rol(I,(r))), \
+ (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]) )
+#define F3(D,m,r) ( (I = ((m) - (D))), (I=rol(I,(r))), \
+ (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) )
+
+static void
+do_encrypt_block( CAST5_context *c, byte *outbuf, const byte *inbuf )
+{
+ u32 l, r, t;
+ u32 I; /* used by the Fx macros */
+ u32 *Km;
+ u32 Kr;
+
+ Km = c->Km;
+ Kr = buf_get_le32(c->Kr + 0);
+
+ /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and
+ * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.)
+ */
+ l = buf_get_be32(inbuf + 0);
+ r = buf_get_be32(inbuf + 4);
+
+ /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows:
+ * Li = Ri-1;
+ * Ri = Li-1 ^ f(Ri-1,Kmi,Kri), where f is defined in Section 2.2
+ * Rounds 1, 4, 7, 10, 13, and 16 use f function Type 1.
+ * Rounds 2, 5, 8, 11, and 14 use f function Type 2.
+ * Rounds 3, 6, 9, 12, and 15 use f function Type 3.
+ */
+
+ t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr = buf_get_le32(c->Kr + 4);
+ t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr = buf_get_le32(c->Kr + 8);
+ t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr = buf_get_le32(c->Kr + 12);
+ t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31);
+
+ /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and
+ * concatenate to form the ciphertext.) */
+ buf_put_be32(outbuf + 0, r);
+ buf_put_be32(outbuf + 4, l);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+ CAST5_context *c = (CAST5_context *) context;
+ do_encrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (20+4*sizeof(void*));
+}
+
+
+static void
+do_encrypt_block_3( CAST5_context *c, byte *outbuf, const byte *inbuf )
+{
+ u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
+ u32 I; /* used by the Fx macros */
+ u32 *Km;
+ u32 Kr;
+
+ Km = c->Km;
+ Kr = buf_get_le32(c->Kr + 0);
+
+ l0 = buf_get_be32(inbuf + 0);
+ r0 = buf_get_be32(inbuf + 4);
+ l1 = buf_get_be32(inbuf + 8);
+ r1 = buf_get_be32(inbuf + 12);
+ l2 = buf_get_be32(inbuf + 16);
+ r2 = buf_get_be32(inbuf + 20);
+
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
+ Kr = buf_get_le32(c->Kr + 4);
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
+ Kr = buf_get_le32(c->Kr + 8);
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
+ Kr = buf_get_le32(c->Kr + 12);
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);
+
+ buf_put_be32(outbuf + 0, r0);
+ buf_put_be32(outbuf + 4, l0);
+ buf_put_be32(outbuf + 8, r1);
+ buf_put_be32(outbuf + 12, l1);
+ buf_put_be32(outbuf + 16, r2);
+ buf_put_be32(outbuf + 20, l2);
+}
+
+
+static void
+do_decrypt_block (CAST5_context *c, byte *outbuf, const byte *inbuf )
+{
+ u32 l, r, t;
+ u32 I;
+ u32 *Km;
+ u32 Kr;
+
+ Km = c->Km;
+ Kr = buf_get_be32(c->Kr + 12);
+
+ l = buf_get_be32(inbuf + 0);
+ r = buf_get_be32(inbuf + 4);
+
+ t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr = buf_get_be32(c->Kr + 8);
+ t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr = buf_get_be32(c->Kr + 4);
+ t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr = buf_get_be32(c->Kr + 0);
+ t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
+ t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31);
+
+ buf_put_be32(outbuf + 0, r);
+ buf_put_be32(outbuf + 4, l);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ CAST5_context *c = (CAST5_context *) context;
+ do_decrypt_block (c, outbuf, inbuf);
+ return /*burn_stack*/ (20+4*sizeof(void*));
+}
+
+
+static void
+do_decrypt_block_3 (CAST5_context *c, byte *outbuf, const byte *inbuf )
+{
+ u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
+ u32 I;
+ u32 *Km;
+ u32 Kr;
+
+ Km = c->Km;
+ Kr = buf_get_be32(c->Kr + 12);
+
+ l0 = buf_get_be32(inbuf + 0);
+ r0 = buf_get_be32(inbuf + 4);
+ l1 = buf_get_be32(inbuf + 8);
+ r1 = buf_get_be32(inbuf + 12);
+ l2 = buf_get_be32(inbuf + 16);
+ r2 = buf_get_be32(inbuf + 20);
+
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
+ Kr = buf_get_be32(c->Kr + 8);
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
+ Kr = buf_get_be32(c->Kr + 4);
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
+ Kr = buf_get_be32(c->Kr + 0);
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
+ Kr >>= 8;
+ t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
+ t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
+ t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);
+
+ buf_put_be32(outbuf + 0, r0);
+ buf_put_be32(outbuf + 4, l0);
+ buf_put_be32(outbuf + 8, r1);
+ buf_put_be32(outbuf + 12, l1);
+ buf_put_be32(outbuf + 16, r2);
+ buf_put_be32(outbuf + 20, l2);
+}
+
+#endif /*!USE_ARM_ASM*/
+
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size CAST5_BLOCKSIZE. */
+static void
+_gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ CAST5_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
+ int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+ {
+ if (nblocks >= 4)
+ burn_stack_depth += 8 * sizeof(void*);
+
+ /* Process data in 4 block chunks. */
+ while (nblocks >= 4)
+ {
+ cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 4;
+ outbuf += 4 * CAST5_BLOCKSIZE;
+ inbuf += 4 * CAST5_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#elif defined(USE_ARM_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 2;
+ outbuf += 2 * CAST5_BLOCKSIZE;
+ inbuf += 2 * CAST5_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+ for ( ;nblocks >= 3; nblocks -= 3)
+ {
+ /* Prepare the counter blocks. */
+ cipher_block_cpy (tmpbuf + 0, ctr, CAST5_BLOCKSIZE);
+ cipher_block_cpy (tmpbuf + 8, ctr, CAST5_BLOCKSIZE);
+ cipher_block_cpy (tmpbuf + 16, ctr, CAST5_BLOCKSIZE);
+ cipher_block_add (tmpbuf + 8, 1, CAST5_BLOCKSIZE);
+ cipher_block_add (tmpbuf + 16, 2, CAST5_BLOCKSIZE);
+ cipher_block_add (ctr, 3, CAST5_BLOCKSIZE);
+ /* Encrypt the counter. */
+ do_encrypt_block_3(ctx, tmpbuf, tmpbuf);
+ /* XOR the input with the encrypted counter and store in output. */
+ buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE * 3);
+ outbuf += CAST5_BLOCKSIZE * 3;
+ inbuf += CAST5_BLOCKSIZE * 3;
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ do_encrypt_block(ctx, tmpbuf, ctr);
+ /* XOR the input with the encrypted counter and store in output. */
+ cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
+ outbuf += CAST5_BLOCKSIZE;
+ inbuf += CAST5_BLOCKSIZE;
+ /* Increment the counter. */
+ cipher_block_add (ctr, 1, CAST5_BLOCKSIZE);
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ CAST5_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[CAST5_BLOCKSIZE * 3];
+ int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+ {
+ if (nblocks >= 4)
+ burn_stack_depth += 8 * sizeof(void*);
+
+ /* Process data in 4 block chunks. */
+ while (nblocks >= 4)
+ {
+ cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 4;
+ outbuf += 4 * CAST5_BLOCKSIZE;
+ inbuf += 4 * CAST5_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#elif defined(USE_ARM_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 2;
+ outbuf += 2 * CAST5_BLOCKSIZE;
+ inbuf += 2 * CAST5_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+ for ( ;nblocks >= 3; nblocks -= 3)
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ do_decrypt_block_3 (ctx, savebuf, inbuf);
+
+ cipher_block_xor_1 (savebuf + 0, iv, CAST5_BLOCKSIZE);
+ cipher_block_xor_1 (savebuf + 8, inbuf, CAST5_BLOCKSIZE * 2);
+ cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
+ buf_cpy (outbuf, savebuf, CAST5_BLOCKSIZE * 3);
+ inbuf += CAST5_BLOCKSIZE * 3;
+ outbuf += CAST5_BLOCKSIZE * 3;
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ do_decrypt_block (ctx, savebuf, inbuf);
+
+ cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
+ inbuf += CAST5_BLOCKSIZE;
+ outbuf += CAST5_BLOCKSIZE;
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ CAST5_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
+ int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+ {
+ if (nblocks >= 4)
+ burn_stack_depth += 8 * sizeof(void*);
+
+ /* Process data in 4 block chunks. */
+ while (nblocks >= 4)
+ {
+ cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 4;
+ outbuf += 4 * CAST5_BLOCKSIZE;
+ inbuf += 4 * CAST5_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#elif defined(USE_ARM_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 2;
+ outbuf += 2 * CAST5_BLOCKSIZE;
+ inbuf += 2 * CAST5_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+ for ( ;nblocks >= 3; nblocks -= 3 )
+ {
+ cipher_block_cpy (tmpbuf + 0, iv, CAST5_BLOCKSIZE);
+ cipher_block_cpy (tmpbuf + 8, inbuf + 0, CAST5_BLOCKSIZE * 2);
+ cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
+ do_encrypt_block_3 (ctx, tmpbuf, tmpbuf);
+ buf_xor (outbuf, inbuf, tmpbuf, CAST5_BLOCKSIZE * 3);
+ outbuf += CAST5_BLOCKSIZE * 3;
+ inbuf += CAST5_BLOCKSIZE * 3;
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_encrypt_block(ctx, iv, iv);
+ cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
+ outbuf += CAST5_BLOCKSIZE;
+ inbuf += CAST5_BLOCKSIZE;
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Run the self-tests for CAST5-CTR, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+ const int nblocks = 4+1;
+ const int blocksize = CAST5_BLOCKSIZE;
+ const int context_size = sizeof(CAST5_context);
+
+ return _gcry_selftest_helper_ctr("CAST5", &cast_setkey,
+ &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for CAST5-CBC, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+ const int nblocks = 4+2;
+ const int blocksize = CAST5_BLOCKSIZE;
+ const int context_size = sizeof(CAST5_context);
+
+ return _gcry_selftest_helper_cbc("CAST5", &cast_setkey,
+ &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for CAST5-CFB, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+ const int nblocks = 4+2;
+ const int blocksize = CAST5_BLOCKSIZE;
+ const int context_size = sizeof(CAST5_context);
+
+ return _gcry_selftest_helper_cfb("CAST5", &cast_setkey,
+ &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+static const char*
+selftest(void)
+{
+ CAST5_context c;
+ cipher_bulk_ops_t bulk_ops;
+ static const byte key[16] =
+ { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78,
+ 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A };
+ static const byte plain[8] =
+ { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF };
+ static const byte cipher[8] =
+ { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 };
+ byte buffer[8];
+ const char *r;
+
+ cast_setkey( &c, key, 16, &bulk_ops );
+ encrypt_block( &c, buffer, plain );
+ if( memcmp( buffer, cipher, 8 ) )
+ return "1";
+ decrypt_block( &c, buffer, buffer );
+ if( memcmp( buffer, plain, 8 ) )
+ return "2";
+
+#if 0 /* full maintenance test */
+ {
+ int i;
+ byte a0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78,
+ 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A };
+ byte b0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78,
+ 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A };
+ byte a1[16] = { 0xEE,0xA9,0xD0,0xA2,0x49,0xFD,0x3B,0xA6,
+ 0xB3,0x43,0x6F,0xB8,0x9D,0x6D,0xCA,0x92 };
+ byte b1[16] = { 0xB2,0xC9,0x5E,0xB0,0x0C,0x31,0xAD,0x71,
+ 0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E };
+
+ for(i=0; i < 1000000; i++ ) {
+ cast_setkey( &c, b0, 16, &bulk_ops );
+ encrypt_block( &c, a0, a0 );
+ encrypt_block( &c, a0+8, a0+8 );
+ cast_setkey( &c, a0, 16, &bulk_ops );
+ encrypt_block( &c, b0, b0 );
+ encrypt_block( &c, b0+8, b0+8 );
+ }
+ if( memcmp( a0, a1, 16 ) || memcmp( b0, b1, 16 ) )
+ return "3";
+
+ }
+#endif
+
+ if ( (r = selftest_cbc ()) )
+ return r;
+
+ if ( (r = selftest_cfb ()) )
+ return r;
+
+ if ( (r = selftest_ctr ()) )
+ return r;
+
+ return NULL;
+}
+
+
+static void
+key_schedule( u32 *x, u32 *z, u32 *k )
+{
+
+#define xi(i) ((x[(i)/4] >> (8*(3-((i)%4)))) & 0xff)
+#define zi(i) ((z[(i)/4] >> (8*(3-((i)%4)))) & 0xff)
+
+ z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)];
+ z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)];
+ z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)];
+ z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)];
+ k[0] = s5[zi( 8)]^s6[zi( 9)]^s7[zi( 7)]^s8[zi( 6)]^s5[zi( 2)];
+ k[1] = s5[zi(10)]^s6[zi(11)]^s7[zi( 5)]^s8[zi( 4)]^s6[zi( 6)];
+ k[2] = s5[zi(12)]^s6[zi(13)]^s7[zi( 3)]^s8[zi( 2)]^s7[zi( 9)];
+ k[3] = s5[zi(14)]^s6[zi(15)]^s7[zi( 1)]^s8[zi( 0)]^s8[zi(12)];
+
+ x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)];
+ x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)];
+ x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)];
+ x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)];
+ k[4] = s5[xi( 3)]^s6[xi( 2)]^s7[xi(12)]^s8[xi(13)]^s5[xi( 8)];
+ k[5] = s5[xi( 1)]^s6[xi( 0)]^s7[xi(14)]^s8[xi(15)]^s6[xi(13)];
+ k[6] = s5[xi( 7)]^s6[xi( 6)]^s7[xi( 8)]^s8[xi( 9)]^s7[xi( 3)];
+ k[7] = s5[xi( 5)]^s6[xi( 4)]^s7[xi(10)]^s8[xi(11)]^s8[xi( 7)];
+
+ z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)];
+ z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)];
+ z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)];
+ z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)];
+ k[8] = s5[zi( 3)]^s6[zi( 2)]^s7[zi(12)]^s8[zi(13)]^s5[zi( 9)];
+ k[9] = s5[zi( 1)]^s6[zi( 0)]^s7[zi(14)]^s8[zi(15)]^s6[zi(12)];
+ k[10]= s5[zi( 7)]^s6[zi( 6)]^s7[zi( 8)]^s8[zi( 9)]^s7[zi( 2)];
+ k[11]= s5[zi( 5)]^s6[zi( 4)]^s7[zi(10)]^s8[zi(11)]^s8[zi( 6)];
+
+ x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)];
+ x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)];
+ x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)];
+ x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)];
+ k[12]= s5[xi( 8)]^s6[xi( 9)]^s7[xi( 7)]^s8[xi( 6)]^s5[xi( 3)];
+ k[13]= s5[xi(10)]^s6[xi(11)]^s7[xi( 5)]^s8[xi( 4)]^s6[xi( 7)];
+ k[14]= s5[xi(12)]^s6[xi(13)]^s7[xi( 3)]^s8[xi( 2)]^s7[xi( 8)];
+ k[15]= s5[xi(14)]^s6[xi(15)]^s7[xi( 1)]^s8[xi( 0)]^s8[xi(13)];
+
+#undef xi
+#undef zi
+}
+
+
+static gcry_err_code_t
+do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen )
+{
+ static int initialized;
+ static const char* selftest_failed;
+ int i;
+ u32 x[4];
+ u32 z[4];
+ u32 k[16];
+
+ if( !initialized )
+ {
+ initialized = 1;
+ selftest_failed = selftest();
+ if( selftest_failed )
+ log_error ("CAST5 selftest failed (%s).\n", selftest_failed );
+ }
+ if( selftest_failed )
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if( keylen != 16 )
+ return GPG_ERR_INV_KEYLEN;
+
+ x[0] = buf_get_be32(key + 0);
+ x[1] = buf_get_be32(key + 4);
+ x[2] = buf_get_be32(key + 8);
+ x[3] = buf_get_be32(key + 12);
+
+ key_schedule( x, z, k );
+ for(i=0; i < 16; i++ )
+ c->Km[i] = k[i];
+ key_schedule( x, z, k );
+ for(i=0; i < 16; i++ )
+ c->Kr[i] = k[i] & 0x1f;
+
+#ifdef USE_ARM_ASM
+ for (i = 0; i < 4; i++)
+ {
+ byte Kr_arm[4];
+
+ /* Convert rotate left to rotate right and add shift left
+ * by 2. */
+ Kr_arm[0] = ((32 - c->Kr[4 * i + 0]) - 2) & 0x1f;
+ Kr_arm[1] = ((32 - c->Kr[4 * i + 1]) - 2) & 0x1f;
+ Kr_arm[2] = ((32 - c->Kr[4 * i + 2]) - 2) & 0x1f;
+ Kr_arm[3] = ((32 - c->Kr[4 * i + 3]) - 2) & 0x1f;
+
+ /* Endian friendly store. */
+ c->Kr_arm_enc[i] = Kr_arm[0] |
+ (Kr_arm[1] << 8) |
+ (Kr_arm[2] << 16) |
+ (Kr_arm[3] << 24);
+ c->Kr_arm_dec[i] = Kr_arm[3] |
+ (Kr_arm[2] << 8) |
+ (Kr_arm[1] << 16) |
+ (Kr_arm[0] << 24);
+
+ wipememory(Kr_arm, sizeof(Kr_arm));
+ }
+#endif
+
+ wipememory(x, sizeof x);
+ wipememory(z, sizeof z);
+ wipememory(k, sizeof k);
+
+#undef xi
+#undef zi
+ return GPG_ERR_NO_ERROR;
+}
+
+static gcry_err_code_t
+cast_setkey (void *context, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ CAST5_context *c = (CAST5_context *) context;
+ gcry_err_code_t rc = do_cast_setkey (c, key, keylen);
+
+ /* Setup bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cfb_dec = _gcry_cast5_cfb_dec;
+ bulk_ops->cbc_dec = _gcry_cast5_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_cast5_ctr_enc;
+
+ return rc;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_cast5 =
+ {
+ GCRY_CIPHER_CAST5, {0, 0},
+ "CAST5", NULL, NULL, CAST5_BLOCKSIZE, 128, sizeof (CAST5_context),
+ cast_setkey, encrypt_block, decrypt_block
+ };
diff --git a/comm/third_party/libgcrypt/cipher/chacha20-aarch64.S b/comm/third_party/libgcrypt/cipher/chacha20-aarch64.S
new file mode 100644
index 0000000000..b8f9724a37
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/chacha20-aarch64.S
@@ -0,0 +1,648 @@
+/* chacha20-aarch64.S - ARMv8/AArch64 accelerated chacha20 blocks function
+ *
+ * Copyright (C) 2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \
+ defined(USE_CHACHA20)
+
+.cpu generic+simd
+
+.text
+
+#include "asm-poly1305-aarch64.h"
+
+/* register macros */
+#define INPUT x0
+#define DST x1
+#define SRC x2
+#define NBLKS x3
+#define ROUND x4
+#define INPUT_CTR x5
+#define INPUT_POS x6
+#define CTR x7
+
+/* vector registers */
+#define X0 v16
+#define X1 v17
+#define X2 v18
+#define X3 v19
+#define X4 v20
+#define X5 v21
+#define X6 v22
+#define X7 v23
+#define X8 v24
+#define X9 v25
+#define X10 v26
+#define X11 v27
+#define X12 v28
+#define X13 v29
+#define X14 v30
+#define X15 v31
+
+#define VCTR v0
+#define VTMP0 v1
+#define VTMP1 v2
+#define VTMP2 v3
+#define VTMP3 v4
+#define X12_TMP v5
+#define X13_TMP v6
+#define ROT8 v7
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+#define _(...) __VA_ARGS__
+
+#define vpunpckldq(s1, s2, dst) \
+ zip1 dst.4s, s2.4s, s1.4s;
+
+#define vpunpckhdq(s1, s2, dst) \
+ zip2 dst.4s, s2.4s, s1.4s;
+
+#define vpunpcklqdq(s1, s2, dst) \
+ zip1 dst.2d, s2.2d, s1.2d;
+
+#define vpunpckhqdq(s1, s2, dst) \
+ zip2 dst.2d, s2.2d, s1.2d;
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+ vpunpckhdq(x1, x0, t2); \
+ vpunpckldq(x1, x0, x0); \
+ \
+ vpunpckldq(x3, x2, t1); \
+ vpunpckhdq(x3, x2, x2); \
+ \
+ vpunpckhqdq(t1, x0, x1); \
+ vpunpcklqdq(t1, x0, x0); \
+ \
+ vpunpckhqdq(x2, t2, x3); \
+ vpunpcklqdq(x2, t2, x2);
+
+#define clear(x) \
+ eor x.16b, x.16b, x.16b;
+
+/**********************************************************************
+ 4-way chacha20
+ **********************************************************************/
+
+#define XOR(d,s1,s2) \
+ eor d.16b, s2.16b, s1.16b;
+
+#define PLUS(ds,s) \
+ add ds.4s, ds.4s, s.4s;
+
+#define ROTATE4(dst1,dst2,dst3,dst4,c,src1,src2,src3,src4,iop1,iop2,iop3) \
+ shl dst1.4s, src1.4s, #(c); \
+ shl dst2.4s, src2.4s, #(c); \
+ iop1; \
+ shl dst3.4s, src3.4s, #(c); \
+ shl dst4.4s, src4.4s, #(c); \
+ iop2; \
+ sri dst1.4s, src1.4s, #(32 - (c)); \
+ sri dst2.4s, src2.4s, #(32 - (c)); \
+ iop3; \
+ sri dst3.4s, src3.4s, #(32 - (c)); \
+ sri dst4.4s, src4.4s, #(32 - (c));
+
+#define ROTATE4_8(dst1,dst2,dst3,dst4,src1,src2,src3,src4,iop1,iop2,iop3) \
+ tbl dst1.16b, {src1.16b}, ROT8.16b; \
+ iop1; \
+ tbl dst2.16b, {src2.16b}, ROT8.16b; \
+ iop2; \
+ tbl dst3.16b, {src3.16b}, ROT8.16b; \
+ iop3; \
+ tbl dst4.16b, {src4.16b}, ROT8.16b;
+
+#define ROTATE4_16(dst1,dst2,dst3,dst4,src1,src2,src3,src4,iop1) \
+ rev32 dst1.8h, src1.8h; \
+ rev32 dst2.8h, src2.8h; \
+ iop1; \
+ rev32 dst3.8h, src3.8h; \
+ rev32 dst4.8h, src4.8h;
+
+#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4,ign,tmp1,tmp2,tmp3,tmp4,\
+ iop1,iop2,iop3,iop4,iop5,iop6,iop7,iop8,iop9,iop10,iop11,iop12,iop13,iop14,\
+ iop15,iop16,iop17,iop18,iop19,iop20,iop21,iop22,iop23,iop24,iop25,iop26,\
+ iop27,iop28,iop29) \
+ PLUS(a1,b1); PLUS(a2,b2); iop1; \
+ PLUS(a3,b3); PLUS(a4,b4); iop2; \
+ XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); iop3; \
+ XOR(tmp3,d3,a3); XOR(tmp4,d4,a4); iop4; \
+ ROTATE4_16(d1, d2, d3, d4, tmp1, tmp2, tmp3, tmp4, _(iop5)); \
+ iop6; \
+ PLUS(c1,d1); PLUS(c2,d2); iop7; \
+ PLUS(c3,d3); PLUS(c4,d4); iop8; \
+ XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); iop9; \
+ XOR(tmp3,b3,c3); XOR(tmp4,b4,c4); iop10; \
+ ROTATE4(b1, b2, b3, b4, 12, tmp1, tmp2, tmp3, tmp4, \
+ _(iop11), _(iop12), _(iop13)); iop14; \
+ PLUS(a1,b1); PLUS(a2,b2); iop15; \
+ PLUS(a3,b3); PLUS(a4,b4); iop16; \
+ XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); iop17; \
+ XOR(tmp3,d3,a3); XOR(tmp4,d4,a4); iop18; \
+ ROTATE4_8(d1, d2, d3, d4, tmp1, tmp2, tmp3, tmp4, \
+ _(iop19), _(iop20), _(iop21)); iop22; \
+ PLUS(c1,d1); PLUS(c2,d2); iop23; \
+ PLUS(c3,d3); PLUS(c4,d4); iop24; \
+ XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); iop25; \
+ XOR(tmp3,b3,c3); XOR(tmp4,b4,c4); iop26; \
+ ROTATE4(b1, b2, b3, b4, 7, tmp1, tmp2, tmp3, tmp4, \
+ _(iop27), _(iop28), _(iop29));
+
+.align 4
+.globl _gcry_chacha20_aarch64_blocks4_data_inc_counter
+_gcry_chacha20_aarch64_blocks4_data_inc_counter:
+ .long 0,1,2,3
+
+.align 4
+.globl _gcry_chacha20_aarch64_blocks4_data_rot8
+_gcry_chacha20_aarch64_blocks4_data_rot8:
+ .byte 3,0,1,2
+ .byte 7,4,5,6
+ .byte 11,8,9,10
+ .byte 15,12,13,14
+
+.align 3
+.globl _gcry_chacha20_aarch64_blocks4
+ELF(.type _gcry_chacha20_aarch64_blocks4,%function;)
+
+_gcry_chacha20_aarch64_blocks4:
+ /* input:
+ * x0: input
+ * x1: dst
+ * x2: src
+ * x3: nblks (multiple of 4)
+ */
+ CFI_STARTPROC()
+
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ add INPUT_CTR, INPUT, #(12*4);
+ ld1 {ROT8.16b}, [CTR];
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ mov INPUT_POS, INPUT;
+ ld1 {VCTR.16b}, [CTR];
+
+.Loop4:
+ /* Construct counter vectors X12 and X13 */
+
+ ld1 {X15.16b}, [INPUT_CTR];
+ mov ROUND, #20;
+ ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS];
+
+ dup X12.4s, X15.s[0];
+ dup X13.4s, X15.s[1];
+ ldr CTR, [INPUT_CTR];
+ add X12.4s, X12.4s, VCTR.4s;
+ dup X0.4s, VTMP1.s[0];
+ dup X1.4s, VTMP1.s[1];
+ dup X2.4s, VTMP1.s[2];
+ dup X3.4s, VTMP1.s[3];
+ dup X14.4s, X15.s[2];
+ cmhi VTMP0.4s, VCTR.4s, X12.4s;
+ dup X15.4s, X15.s[3];
+ add CTR, CTR, #4; /* Update counter */
+ dup X4.4s, VTMP2.s[0];
+ dup X5.4s, VTMP2.s[1];
+ dup X6.4s, VTMP2.s[2];
+ dup X7.4s, VTMP2.s[3];
+ sub X13.4s, X13.4s, VTMP0.4s;
+ dup X8.4s, VTMP3.s[0];
+ dup X9.4s, VTMP3.s[1];
+ dup X10.4s, VTMP3.s[2];
+ dup X11.4s, VTMP3.s[3];
+ mov X12_TMP.16b, X12.16b;
+ mov X13_TMP.16b, X13.16b;
+ str CTR, [INPUT_CTR];
+
+.Lround2:
+ subs ROUND, ROUND, #2
+ QUARTERROUND4(X0, X4, X8, X12, X1, X5, X9, X13,
+ X2, X6, X10, X14, X3, X7, X11, X15,
+ tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,)
+ QUARTERROUND4(X0, X5, X10, X15, X1, X6, X11, X12,
+ X2, X7, X8, X13, X3, X4, X9, X14,
+ tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,)
+ b.ne .Lround2;
+
+ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS], #32;
+
+ PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */
+ PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */
+
+ dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */
+ dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */
+ dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */
+ dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */
+ PLUS(X0, VTMP2);
+ PLUS(X1, VTMP3);
+ PLUS(X2, X12_TMP);
+ PLUS(X3, X13_TMP);
+
+ dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */
+ dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */
+ dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */
+ dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */
+ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS];
+ mov INPUT_POS, INPUT;
+ PLUS(X4, VTMP2);
+ PLUS(X5, VTMP3);
+ PLUS(X6, X12_TMP);
+ PLUS(X7, X13_TMP);
+
+ dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */
+ dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */
+ dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */
+ dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */
+ dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */
+ dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */
+ PLUS(X8, VTMP2);
+ PLUS(X9, VTMP3);
+ PLUS(X10, X12_TMP);
+ PLUS(X11, X13_TMP);
+ PLUS(X14, VTMP0);
+ PLUS(X15, VTMP1);
+
+ transpose_4x4(X0, X1, X2, X3, VTMP0, VTMP1, VTMP2);
+ transpose_4x4(X4, X5, X6, X7, VTMP0, VTMP1, VTMP2);
+ transpose_4x4(X8, X9, X10, X11, VTMP0, VTMP1, VTMP2);
+ transpose_4x4(X12, X13, X14, X15, VTMP0, VTMP1, VTMP2);
+
+ subs NBLKS, NBLKS, #4;
+
+ ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+ ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+ eor VTMP0.16b, X0.16b, VTMP0.16b;
+ eor VTMP1.16b, X4.16b, VTMP1.16b;
+ eor VTMP2.16b, X8.16b, VTMP2.16b;
+ eor VTMP3.16b, X12.16b, VTMP3.16b;
+ eor X12_TMP.16b, X1.16b, X12_TMP.16b;
+ eor X13_TMP.16b, X5.16b, X13_TMP.16b;
+ st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+ ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+ st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+ ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+ eor VTMP0.16b, X9.16b, VTMP0.16b;
+ eor VTMP1.16b, X13.16b, VTMP1.16b;
+ eor VTMP2.16b, X2.16b, VTMP2.16b;
+ eor VTMP3.16b, X6.16b, VTMP3.16b;
+ eor X12_TMP.16b, X10.16b, X12_TMP.16b;
+ eor X13_TMP.16b, X14.16b, X13_TMP.16b;
+ st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+ ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+ st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+ eor VTMP0.16b, X3.16b, VTMP0.16b;
+ eor VTMP1.16b, X7.16b, VTMP1.16b;
+ eor VTMP2.16b, X11.16b, VTMP2.16b;
+ eor VTMP3.16b, X15.16b, VTMP3.16b;
+ st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+
+ b.ne .Loop4;
+
+ /* clear the used vector registers and stack */
+ clear(VTMP0);
+ clear(VTMP1);
+ clear(VTMP2);
+ clear(VTMP3);
+ clear(X12_TMP);
+ clear(X13_TMP);
+ clear(X0);
+ clear(X1);
+ clear(X2);
+ clear(X3);
+ clear(X4);
+ clear(X5);
+ clear(X6);
+ clear(X7);
+ clear(X8);
+ clear(X9);
+ clear(X10);
+ clear(X11);
+ clear(X12);
+ clear(X13);
+ clear(X14);
+ clear(X15);
+
+ eor x0, x0, x0
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;)
+
+/**********************************************************************
+ 4-way stitched chacha20-poly1305
+ **********************************************************************/
+
+.align 3
+.globl _gcry_chacha20_poly1305_aarch64_blocks4
+ELF(.type _gcry_chacha20_poly1305_aarch64_blocks4,%function;)
+
+_gcry_chacha20_poly1305_aarch64_blocks4:
+ /* input:
+ * x0: input
+ * x1: dst
+ * x2: src
+ * x3: nblks (multiple of 4)
+ * x4: poly1305-state
+ * x5: poly1305-src
+ */
+ CFI_STARTPROC()
+ POLY1305_PUSH_REGS()
+
+ mov POLY_RSTATE, x4;
+ mov POLY_RSRC, x5;
+
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ add INPUT_CTR, INPUT, #(12*4);
+ ld1 {ROT8.16b}, [CTR];
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ mov INPUT_POS, INPUT;
+ ld1 {VCTR.16b}, [CTR];
+
+ POLY1305_LOAD_STATE()
+
+.Loop_poly4:
+ /* Construct counter vectors X12 and X13 */
+
+ ld1 {X15.16b}, [INPUT_CTR];
+ ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS];
+
+ dup X12.4s, X15.s[0];
+ dup X13.4s, X15.s[1];
+ ldr CTR, [INPUT_CTR];
+ add X12.4s, X12.4s, VCTR.4s;
+ dup X0.4s, VTMP1.s[0];
+ dup X1.4s, VTMP1.s[1];
+ dup X2.4s, VTMP1.s[2];
+ dup X3.4s, VTMP1.s[3];
+ dup X14.4s, X15.s[2];
+ cmhi VTMP0.4s, VCTR.4s, X12.4s;
+ dup X15.4s, X15.s[3];
+ add CTR, CTR, #4; /* Update counter */
+ dup X4.4s, VTMP2.s[0];
+ dup X5.4s, VTMP2.s[1];
+ dup X6.4s, VTMP2.s[2];
+ dup X7.4s, VTMP2.s[3];
+ sub X13.4s, X13.4s, VTMP0.4s;
+ dup X8.4s, VTMP3.s[0];
+ dup X9.4s, VTMP3.s[1];
+ dup X10.4s, VTMP3.s[2];
+ dup X11.4s, VTMP3.s[3];
+ mov X12_TMP.16b, X12.16b;
+ mov X13_TMP.16b, X13.16b;
+ str CTR, [INPUT_CTR];
+
+ mov ROUND, #20
+.Lround4_with_poly1305_outer:
+ mov POLY_CHACHA_ROUND, #6;
+.Lround4_with_poly1305_inner1:
+ POLY1305_BLOCK_PART1(0 * 16)
+ QUARTERROUND4(X0, X4, X8, X12, X1, X5, X9, X13,
+ X2, X6, X10, X14, X3, X7, X11, X15,
+ tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+ POLY1305_BLOCK_PART2(0 * 16),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8(),
+ POLY1305_BLOCK_PART9(),
+ POLY1305_BLOCK_PART10(),
+ POLY1305_BLOCK_PART11(),
+ POLY1305_BLOCK_PART12(),
+ POLY1305_BLOCK_PART13(),
+ POLY1305_BLOCK_PART14(),
+ POLY1305_BLOCK_PART15(),
+ POLY1305_BLOCK_PART16(),
+ POLY1305_BLOCK_PART17(),
+ POLY1305_BLOCK_PART18(),
+ POLY1305_BLOCK_PART19(),
+ POLY1305_BLOCK_PART20(),
+ POLY1305_BLOCK_PART21(),
+ POLY1305_BLOCK_PART22(),
+ POLY1305_BLOCK_PART23(),
+ POLY1305_BLOCK_PART24(),
+ POLY1305_BLOCK_PART25(),
+ POLY1305_BLOCK_PART26(),
+ POLY1305_BLOCK_PART27(),
+ POLY1305_BLOCK_PART28(),
+ POLY1305_BLOCK_PART29(),
+ POLY1305_BLOCK_PART1(1 * 16))
+ POLY1305_BLOCK_PART2(1 * 16)
+ QUARTERROUND4(X0, X5, X10, X15, X1, X6, X11, X12,
+ X2, X7, X8, X13, X3, X4, X9, X14,
+ tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+ _(add POLY_RSRC, POLY_RSRC, #(2*16)),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8(),
+ POLY1305_BLOCK_PART9(),
+ POLY1305_BLOCK_PART10(),
+ POLY1305_BLOCK_PART11(),
+ POLY1305_BLOCK_PART12(),
+ POLY1305_BLOCK_PART13(),
+ POLY1305_BLOCK_PART14(),
+ POLY1305_BLOCK_PART15(),
+ POLY1305_BLOCK_PART16(),
+ POLY1305_BLOCK_PART17(),
+ POLY1305_BLOCK_PART18(),
+ POLY1305_BLOCK_PART19(),
+ POLY1305_BLOCK_PART20(),
+ POLY1305_BLOCK_PART21(),
+ POLY1305_BLOCK_PART22(),
+ POLY1305_BLOCK_PART23(),
+ POLY1305_BLOCK_PART24(),
+ POLY1305_BLOCK_PART25(),
+ POLY1305_BLOCK_PART26(),
+ POLY1305_BLOCK_PART27(),
+ POLY1305_BLOCK_PART28(),
+ POLY1305_BLOCK_PART29(),
+ _(subs POLY_CHACHA_ROUND, POLY_CHACHA_ROUND, #2));
+ b.ne .Lround4_with_poly1305_inner1;
+
+ mov POLY_CHACHA_ROUND, #4;
+.Lround4_with_poly1305_inner2:
+ POLY1305_BLOCK_PART1(0 * 16)
+ QUARTERROUND4(X0, X4, X8, X12, X1, X5, X9, X13,
+ X2, X6, X10, X14, X3, X7, X11, X15,
+ tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,,
+ POLY1305_BLOCK_PART2(0 * 16),,
+ _(add POLY_RSRC, POLY_RSRC, #(1*16)),,
+ POLY1305_BLOCK_PART3(),,
+ POLY1305_BLOCK_PART4(),,
+ POLY1305_BLOCK_PART5(),,
+ POLY1305_BLOCK_PART6(),,
+ POLY1305_BLOCK_PART7(),,
+ POLY1305_BLOCK_PART8(),,
+ POLY1305_BLOCK_PART9(),,
+ POLY1305_BLOCK_PART10(),,
+ POLY1305_BLOCK_PART11(),,
+ POLY1305_BLOCK_PART12(),,
+ POLY1305_BLOCK_PART13(),,
+ POLY1305_BLOCK_PART14(),)
+ POLY1305_BLOCK_PART15()
+ QUARTERROUND4(X0, X5, X10, X15, X1, X6, X11, X12,
+ X2, X7, X8, X13, X3, X4, X9, X14,
+ tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+ POLY1305_BLOCK_PART16(),,
+ POLY1305_BLOCK_PART17(),,
+ POLY1305_BLOCK_PART18(),,
+ POLY1305_BLOCK_PART19(),,
+ POLY1305_BLOCK_PART20(),,
+ POLY1305_BLOCK_PART21(),,
+ POLY1305_BLOCK_PART22(),,
+ POLY1305_BLOCK_PART23(),,
+ POLY1305_BLOCK_PART24(),,
+ POLY1305_BLOCK_PART25(),,
+ POLY1305_BLOCK_PART26(),,
+ POLY1305_BLOCK_PART27(),,
+ POLY1305_BLOCK_PART28(),,
+ POLY1305_BLOCK_PART29(),
+ _(subs POLY_CHACHA_ROUND, POLY_CHACHA_ROUND, #2),)
+ b.ne .Lround4_with_poly1305_inner2;
+
+ subs ROUND, ROUND, #10
+ b.ne .Lround4_with_poly1305_outer;
+
+ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS], #32;
+
+ PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */
+ PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */
+
+ dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */
+ dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */
+ dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */
+ dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */
+ PLUS(X0, VTMP2);
+ PLUS(X1, VTMP3);
+ PLUS(X2, X12_TMP);
+ PLUS(X3, X13_TMP);
+
+ dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */
+ dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */
+ dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */
+ dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */
+ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS];
+ mov INPUT_POS, INPUT;
+ PLUS(X4, VTMP2);
+ PLUS(X5, VTMP3);
+ PLUS(X6, X12_TMP);
+ PLUS(X7, X13_TMP);
+
+ dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */
+ dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */
+ dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */
+ dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */
+ dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */
+ dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */
+ PLUS(X8, VTMP2);
+ PLUS(X9, VTMP3);
+ PLUS(X10, X12_TMP);
+ PLUS(X11, X13_TMP);
+ PLUS(X14, VTMP0);
+ PLUS(X15, VTMP1);
+
+ transpose_4x4(X0, X1, X2, X3, VTMP0, VTMP1, VTMP2);
+ transpose_4x4(X4, X5, X6, X7, VTMP0, VTMP1, VTMP2);
+ transpose_4x4(X8, X9, X10, X11, VTMP0, VTMP1, VTMP2);
+ transpose_4x4(X12, X13, X14, X15, VTMP0, VTMP1, VTMP2);
+
+ subs NBLKS, NBLKS, #4;
+
+ ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+ ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+ eor VTMP0.16b, X0.16b, VTMP0.16b;
+ eor VTMP1.16b, X4.16b, VTMP1.16b;
+ eor VTMP2.16b, X8.16b, VTMP2.16b;
+ eor VTMP3.16b, X12.16b, VTMP3.16b;
+ eor X12_TMP.16b, X1.16b, X12_TMP.16b;
+ eor X13_TMP.16b, X5.16b, X13_TMP.16b;
+ st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+ ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+ st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+ ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+ eor VTMP0.16b, X9.16b, VTMP0.16b;
+ eor VTMP1.16b, X13.16b, VTMP1.16b;
+ eor VTMP2.16b, X2.16b, VTMP2.16b;
+ eor VTMP3.16b, X6.16b, VTMP3.16b;
+ eor X12_TMP.16b, X10.16b, X12_TMP.16b;
+ eor X13_TMP.16b, X14.16b, X13_TMP.16b;
+ st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+ ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+ st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+ eor VTMP0.16b, X3.16b, VTMP0.16b;
+ eor VTMP1.16b, X7.16b, VTMP1.16b;
+ eor VTMP2.16b, X11.16b, VTMP2.16b;
+ eor VTMP3.16b, X15.16b, VTMP3.16b;
+ st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+
+ b.ne .Loop_poly4;
+
+ POLY1305_STORE_STATE()
+
+ /* clear the used vector registers and stack */
+ clear(VTMP0);
+ clear(VTMP1);
+ clear(VTMP2);
+ clear(VTMP3);
+ clear(X12_TMP);
+ clear(X13_TMP);
+ clear(X0);
+ clear(X1);
+ clear(X2);
+ clear(X3);
+ clear(X4);
+ clear(X5);
+ clear(X6);
+ clear(X7);
+ clear(X8);
+ clear(X9);
+ clear(X10);
+ clear(X11);
+ clear(X12);
+ clear(X13);
+ clear(X14);
+ clear(X15);
+
+ eor x0, x0, x0
+ POLY1305_POP_REGS()
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_chacha20_poly1305_aarch64_blocks4, .-_gcry_chacha20_poly1305_aarch64_blocks4;)
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S b/comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S
new file mode 100644
index 0000000000..51e107be83
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S
@@ -0,0 +1,601 @@
+/* chacha20-amd64-avx2.S - AVX2 implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+.text
+
+#include "asm-common-amd64.h"
+#include "asm-poly1305-amd64.h"
+
+/* register macros */
+#define INPUT %rdi
+#define DST %rsi
+#define SRC %rdx
+#define NBLKS %rcx
+#define ROUND %eax
+
+/* stack structure */
+#define STACK_VEC_X12 (32)
+#define STACK_VEC_X13 (32 + STACK_VEC_X12)
+#define STACK_TMP (32 + STACK_VEC_X13)
+#define STACK_TMP1 (32 + STACK_TMP)
+
+#define STACK_MAX (32 + STACK_TMP1)
+
+/* vector registers */
+#define X0 %ymm0
+#define X1 %ymm1
+#define X2 %ymm2
+#define X3 %ymm3
+#define X4 %ymm4
+#define X5 %ymm5
+#define X6 %ymm6
+#define X7 %ymm7
+#define X8 %ymm8
+#define X9 %ymm9
+#define X10 %ymm10
+#define X11 %ymm11
+#define X12 %ymm12
+#define X13 %ymm13
+#define X14 %ymm14
+#define X15 %ymm15
+
+#define X0h %xmm0
+#define X1h %xmm1
+#define X2h %xmm2
+#define X3h %xmm3
+#define X4h %xmm4
+#define X5h %xmm5
+#define X6h %xmm6
+#define X7h %xmm7
+#define X8h %xmm8
+#define X9h %xmm9
+#define X10h %xmm10
+#define X11h %xmm11
+#define X12h %xmm12
+#define X13h %xmm13
+#define X14h %xmm14
+#define X15h %xmm15
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* 2x2 128-bit matrix transpose */
+#define transpose_16byte_2x2(x0,x1,t1) \
+ vmovdqa x0, t1; \
+ vperm2i128 $0x20, x1, x0, x0; \
+ vperm2i128 $0x31, x1, t1, x1;
+
+/* xor register with unaligned src and save to unaligned dst */
+#define xor_src_dst(dst, src, offset, xreg) \
+ vpxor offset(src), xreg, xreg; \
+ vmovdqu xreg, offset(dst);
+
+/**********************************************************************
+ 8-way chacha20
+ **********************************************************************/
+
+#define ROTATE2(v1,v2,c,tmp) \
+ vpsrld $(32 - (c)), v1, tmp; \
+ vpslld $(c), v1, v1; \
+ vpaddb tmp, v1, v1; \
+ vpsrld $(32 - (c)), v2, tmp; \
+ vpslld $(c), v2, v2; \
+ vpaddb tmp, v2, v2;
+
+#define ROTATE_SHUF_2(v1,v2,shuf) \
+ vpshufb shuf, v1, v1; \
+ vpshufb shuf, v2, v2;
+
+#define XOR(ds,s) \
+ vpxor s, ds, ds;
+
+#define PLUS(ds,s) \
+ vpaddd s, ds, ds;
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,\
+ interleave_op1,interleave_op2,\
+ interleave_op3,interleave_op4) \
+ vbroadcasti128 .Lshuf_rol16 rRIP, tmp1; \
+ interleave_op1; \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \
+ ROTATE_SHUF_2(d1, d2, tmp1); \
+ interleave_op2; \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \
+ ROTATE2(b1, b2, 12, tmp1); \
+ vbroadcasti128 .Lshuf_rol8 rRIP, tmp1; \
+ interleave_op3; \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \
+ ROTATE_SHUF_2(d1, d2, tmp1); \
+ interleave_op4; \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \
+ ROTATE2(b1, b2, 7, tmp1);
+
+.align 32
+chacha20_data:
+.Lshuf_rol16:
+ .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.Lshuf_rol8:
+ .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+.Linc_counter:
+ .byte 0,1,2,3,4,5,6,7
+.Lunsigned_cmp:
+ .long 0x80000000
+
+.align 8
+.globl _gcry_chacha20_amd64_avx2_blocks8
+ELF(.type _gcry_chacha20_amd64_avx2_blocks8,@function;)
+
+_gcry_chacha20_amd64_avx2_blocks8:
+ /* input:
+ * %rdi: input
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: nblks (multiple of 8)
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ subq $STACK_MAX, %rsp;
+ andq $~31, %rsp;
+
+.Loop8:
+ mov $20, ROUND;
+
+ /* Construct counter vectors X12 and X13 */
+ vpmovzxbd .Linc_counter rRIP, X0;
+ vpbroadcastd .Lunsigned_cmp rRIP, X2;
+ vpbroadcastd (12 * 4)(INPUT), X12;
+ vpbroadcastd (13 * 4)(INPUT), X13;
+ vpaddd X0, X12, X12;
+ vpxor X2, X0, X0;
+ vpxor X2, X12, X1;
+ vpcmpgtd X1, X0, X0;
+ vpsubd X0, X13, X13;
+ vmovdqa X12, (STACK_VEC_X12)(%rsp);
+ vmovdqa X13, (STACK_VEC_X13)(%rsp);
+
+ /* Load vectors */
+ vpbroadcastd (0 * 4)(INPUT), X0;
+ vpbroadcastd (1 * 4)(INPUT), X1;
+ vpbroadcastd (2 * 4)(INPUT), X2;
+ vpbroadcastd (3 * 4)(INPUT), X3;
+ vpbroadcastd (4 * 4)(INPUT), X4;
+ vpbroadcastd (5 * 4)(INPUT), X5;
+ vpbroadcastd (6 * 4)(INPUT), X6;
+ vpbroadcastd (7 * 4)(INPUT), X7;
+ vpbroadcastd (8 * 4)(INPUT), X8;
+ vpbroadcastd (9 * 4)(INPUT), X9;
+ vpbroadcastd (10 * 4)(INPUT), X10;
+ vpbroadcastd (11 * 4)(INPUT), X11;
+ vpbroadcastd (14 * 4)(INPUT), X14;
+ vpbroadcastd (15 * 4)(INPUT), X15;
+ vmovdqa X15, (STACK_TMP)(%rsp);
+
+.Lround2:
+ QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X15,,,,)
+ vmovdqa (STACK_TMP)(%rsp), X15;
+ vmovdqa X8, (STACK_TMP)(%rsp);
+ QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,,,,)
+ QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,,,,)
+ vmovdqa (STACK_TMP)(%rsp), X8;
+ vmovdqa X15, (STACK_TMP)(%rsp);
+ QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X15,,,,)
+ sub $2, ROUND;
+ jnz .Lround2;
+
+ vmovdqa X8, (STACK_TMP1)(%rsp);
+
+ /* tmp := X15 */
+ vpbroadcastd (0 * 4)(INPUT), X15;
+ PLUS(X0, X15);
+ vpbroadcastd (1 * 4)(INPUT), X15;
+ PLUS(X1, X15);
+ vpbroadcastd (2 * 4)(INPUT), X15;
+ PLUS(X2, X15);
+ vpbroadcastd (3 * 4)(INPUT), X15;
+ PLUS(X3, X15);
+ vpbroadcastd (4 * 4)(INPUT), X15;
+ PLUS(X4, X15);
+ vpbroadcastd (5 * 4)(INPUT), X15;
+ PLUS(X5, X15);
+ vpbroadcastd (6 * 4)(INPUT), X15;
+ PLUS(X6, X15);
+ vpbroadcastd (7 * 4)(INPUT), X15;
+ PLUS(X7, X15);
+ transpose_4x4(X0, X1, X2, X3, X8, X15);
+ transpose_4x4(X4, X5, X6, X7, X8, X15);
+ vmovdqa (STACK_TMP1)(%rsp), X8;
+ transpose_16byte_2x2(X0, X4, X15);
+ transpose_16byte_2x2(X1, X5, X15);
+ transpose_16byte_2x2(X2, X6, X15);
+ transpose_16byte_2x2(X3, X7, X15);
+ vmovdqa (STACK_TMP)(%rsp), X15;
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1);
+ vpbroadcastd (8 * 4)(INPUT), X0;
+ PLUS(X8, X0);
+ vpbroadcastd (9 * 4)(INPUT), X0;
+ PLUS(X9, X0);
+ vpbroadcastd (10 * 4)(INPUT), X0;
+ PLUS(X10, X0);
+ vpbroadcastd (11 * 4)(INPUT), X0;
+ PLUS(X11, X0);
+ vmovdqa (STACK_VEC_X12)(%rsp), X0;
+ PLUS(X12, X0);
+ vmovdqa (STACK_VEC_X13)(%rsp), X0;
+ PLUS(X13, X0);
+ vpbroadcastd (14 * 4)(INPUT), X0;
+ PLUS(X14, X0);
+ vpbroadcastd (15 * 4)(INPUT), X0;
+ PLUS(X15, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3);
+
+ /* Update counter */
+ addq $8, (12 * 4)(INPUT);
+
+ transpose_4x4(X8, X9, X10, X11, X0, X1);
+ transpose_4x4(X12, X13, X14, X15, X0, X1);
+ xor_src_dst(DST, SRC, (64 * 4 + 16 * 0), X4);
+ xor_src_dst(DST, SRC, (64 * 5 + 16 * 0), X5);
+ transpose_16byte_2x2(X8, X12, X0);
+ transpose_16byte_2x2(X9, X13, X0);
+ transpose_16byte_2x2(X10, X14, X0);
+ transpose_16byte_2x2(X11, X15, X0);
+ xor_src_dst(DST, SRC, (64 * 6 + 16 * 0), X6);
+ xor_src_dst(DST, SRC, (64 * 7 + 16 * 0), X7);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11);
+ xor_src_dst(DST, SRC, (64 * 4 + 16 * 2), X12);
+ xor_src_dst(DST, SRC, (64 * 5 + 16 * 2), X13);
+ xor_src_dst(DST, SRC, (64 * 6 + 16 * 2), X14);
+ xor_src_dst(DST, SRC, (64 * 7 + 16 * 2), X15);
+
+ sub $8, NBLKS;
+ lea (8 * 64)(DST), DST;
+ lea (8 * 64)(SRC), SRC;
+ jnz .Loop8;
+
+ /* clear the used vector registers and stack */
+ vpxor X0, X0, X0;
+ vmovdqa X0, (STACK_VEC_X12)(%rsp);
+ vmovdqa X0, (STACK_VEC_X13)(%rsp);
+ vmovdqa X0, (STACK_TMP)(%rsp);
+ vmovdqa X0, (STACK_TMP1)(%rsp);
+ vzeroall;
+
+ /* eax zeroed by round loop. */
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_amd64_avx2_blocks8,
+ .-_gcry_chacha20_amd64_avx2_blocks8;)
+
+/**********************************************************************
+ 8-way stitched chacha20-poly1305
+ **********************************************************************/
+
+#define _ /*_*/
+
+.align 8
+.globl _gcry_chacha20_poly1305_amd64_avx2_blocks8
+ELF(.type _gcry_chacha20_poly1305_amd64_avx2_blocks8,@function;)
+
+_gcry_chacha20_poly1305_amd64_avx2_blocks8:
+ /* input:
+ * %rdi: input
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: nblks (multiple of 8)
+ * %r9: poly1305-state
+ * %r8: poly1305-src
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ vzeroupper;
+
+ subq $(9 * 8) + STACK_MAX + 32, %rsp;
+ andq $~31, %rsp;
+
+ movq %rbx, (STACK_MAX + 0 * 8)(%rsp);
+ movq %r12, (STACK_MAX + 1 * 8)(%rsp);
+ movq %r13, (STACK_MAX + 2 * 8)(%rsp);
+ movq %r14, (STACK_MAX + 3 * 8)(%rsp);
+ movq %r15, (STACK_MAX + 4 * 8)(%rsp);
+ CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8);
+ CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8);
+ CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8);
+ CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8);
+ CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8);
+
+ movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC
+ movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST
+ movq %rcx, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+ /* Load state */
+ POLY1305_LOAD_STATE();
+
+.Loop_poly8:
+
+ /* Construct counter vectors X12 and X13 */
+ vpmovzxbd .Linc_counter rRIP, X0;
+ vpbroadcastd .Lunsigned_cmp rRIP, X2;
+ vpbroadcastd (12 * 4)(INPUT), X12;
+ vpbroadcastd (13 * 4)(INPUT), X13;
+ vpaddd X0, X12, X12;
+ vpxor X2, X0, X0;
+ vpxor X2, X12, X1;
+ vpcmpgtd X1, X0, X0;
+ vpsubd X0, X13, X13;
+ vmovdqa X12, (STACK_VEC_X12)(%rsp);
+ vmovdqa X13, (STACK_VEC_X13)(%rsp);
+
+ /* Load vectors */
+ vpbroadcastd (0 * 4)(INPUT), X0;
+ vpbroadcastd (1 * 4)(INPUT), X1;
+ vpbroadcastd (2 * 4)(INPUT), X2;
+ vpbroadcastd (3 * 4)(INPUT), X3;
+ vpbroadcastd (4 * 4)(INPUT), X4;
+ vpbroadcastd (5 * 4)(INPUT), X5;
+ vpbroadcastd (6 * 4)(INPUT), X6;
+ vpbroadcastd (7 * 4)(INPUT), X7;
+ vpbroadcastd (8 * 4)(INPUT), X8;
+ vpbroadcastd (9 * 4)(INPUT), X9;
+ vpbroadcastd (10 * 4)(INPUT), X10;
+ vpbroadcastd (11 * 4)(INPUT), X11;
+ vpbroadcastd (14 * 4)(INPUT), X14;
+ vpbroadcastd (15 * 4)(INPUT), X15;
+ vmovdqa X15, (STACK_TMP)(%rsp);
+
+ /* Process eight ChaCha20 blocks and 32 Poly1305 blocks. */
+
+ movl $20, (STACK_MAX + 8 * 8 + 4)(%rsp);
+.Lround8_with_poly1305_outer:
+ movl $6, (STACK_MAX + 8 * 8)(%rsp);
+.Lround8_with_poly1305_inner1:
+ /* rounds 0-5 & 10-15 */
+ POLY1305_BLOCK_PART1(0 * 16)
+ QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X15,
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5())
+ vmovdqa (STACK_TMP)(%rsp), X15;
+ vmovdqa X8, (STACK_TMP)(%rsp);
+ POLY1305_BLOCK_PART1(1 * 16)
+ QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5())
+ POLY1305_BLOCK_PART1(2 * 16)
+ QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5())
+ vmovdqa (STACK_TMP)(%rsp), X8;
+ vmovdqa X15, (STACK_TMP)(%rsp);
+ POLY1305_BLOCK_PART1(3 * 16)
+ lea (4 * 16)(POLY_RSRC), POLY_RSRC;
+ QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X15,
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5())
+
+ subl $2, (STACK_MAX + 8 * 8)(%rsp);
+ jnz .Lround8_with_poly1305_inner1;
+
+ movl $4, (STACK_MAX + 8 * 8)(%rsp);
+.Lround8_with_poly1305_inner2:
+ /* rounds 6-9 & 16-19 */
+ POLY1305_BLOCK_PART1(0 * 16)
+ QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X15,
+ POLY1305_BLOCK_PART2(),
+ _,
+ POLY1305_BLOCK_PART3(),
+ _)
+ vmovdqa (STACK_TMP)(%rsp), X15;
+ vmovdqa X8, (STACK_TMP)(%rsp);
+ QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,
+ _,
+ POLY1305_BLOCK_PART4(),
+ _,
+ POLY1305_BLOCK_PART5())
+ POLY1305_BLOCK_PART1(1 * 16);
+ lea (2 * 16)(POLY_RSRC), POLY_RSRC;
+ QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,
+ _,
+ POLY1305_BLOCK_PART2(),
+ _,
+ POLY1305_BLOCK_PART3())
+ vmovdqa (STACK_TMP)(%rsp), X8;
+ vmovdqa X15, (STACK_TMP)(%rsp);
+ QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X15,
+ POLY1305_BLOCK_PART4(),
+ _,
+ POLY1305_BLOCK_PART5(),
+ _)
+
+ subl $2, (STACK_MAX + 8 * 8)(%rsp);
+ jnz .Lround8_with_poly1305_inner2;
+
+ subl $10, (STACK_MAX + 8 * 8 + 4)(%rsp);
+ jnz .Lround8_with_poly1305_outer;
+
+ movq (STACK_MAX + 5 * 8)(%rsp), SRC;
+ movq (STACK_MAX + 6 * 8)(%rsp), DST;
+
+ vmovdqa X8, (STACK_TMP1)(%rsp);
+
+ /* tmp := X15 */
+ vpbroadcastd (0 * 4)(INPUT), X15;
+ PLUS(X0, X15);
+ vpbroadcastd (1 * 4)(INPUT), X15;
+ PLUS(X1, X15);
+ vpbroadcastd (2 * 4)(INPUT), X15;
+ PLUS(X2, X15);
+ vpbroadcastd (3 * 4)(INPUT), X15;
+ PLUS(X3, X15);
+ vpbroadcastd (4 * 4)(INPUT), X15;
+ PLUS(X4, X15);
+ vpbroadcastd (5 * 4)(INPUT), X15;
+ PLUS(X5, X15);
+ vpbroadcastd (6 * 4)(INPUT), X15;
+ PLUS(X6, X15);
+ vpbroadcastd (7 * 4)(INPUT), X15;
+ PLUS(X7, X15);
+ transpose_4x4(X0, X1, X2, X3, X8, X15);
+ transpose_4x4(X4, X5, X6, X7, X8, X15);
+ vmovdqa (STACK_TMP1)(%rsp), X8;
+ transpose_16byte_2x2(X0, X4, X15);
+ transpose_16byte_2x2(X1, X5, X15);
+ transpose_16byte_2x2(X2, X6, X15);
+ transpose_16byte_2x2(X3, X7, X15);
+ vmovdqa (STACK_TMP)(%rsp), X15;
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1);
+ vpbroadcastd (8 * 4)(INPUT), X0;
+ PLUS(X8, X0);
+ vpbroadcastd (9 * 4)(INPUT), X0;
+ PLUS(X9, X0);
+ vpbroadcastd (10 * 4)(INPUT), X0;
+ PLUS(X10, X0);
+ vpbroadcastd (11 * 4)(INPUT), X0;
+ PLUS(X11, X0);
+ vmovdqa (STACK_VEC_X12)(%rsp), X0;
+ PLUS(X12, X0);
+ vmovdqa (STACK_VEC_X13)(%rsp), X0;
+ PLUS(X13, X0);
+ vpbroadcastd (14 * 4)(INPUT), X0;
+ PLUS(X14, X0);
+ vpbroadcastd (15 * 4)(INPUT), X0;
+ PLUS(X15, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3);
+
+ /* Update counter */
+ addq $8, (12 * 4)(INPUT);
+
+ transpose_4x4(X8, X9, X10, X11, X0, X1);
+ transpose_4x4(X12, X13, X14, X15, X0, X1);
+ xor_src_dst(DST, SRC, (64 * 4 + 16 * 0), X4);
+ xor_src_dst(DST, SRC, (64 * 5 + 16 * 0), X5);
+ transpose_16byte_2x2(X8, X12, X0);
+ transpose_16byte_2x2(X9, X13, X0);
+ transpose_16byte_2x2(X10, X14, X0);
+ transpose_16byte_2x2(X11, X15, X0);
+ xor_src_dst(DST, SRC, (64 * 6 + 16 * 0), X6);
+ xor_src_dst(DST, SRC, (64 * 7 + 16 * 0), X7);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11);
+ xor_src_dst(DST, SRC, (64 * 4 + 16 * 2), X12);
+ xor_src_dst(DST, SRC, (64 * 5 + 16 * 2), X13);
+ xor_src_dst(DST, SRC, (64 * 6 + 16 * 2), X14);
+ xor_src_dst(DST, SRC, (64 * 7 + 16 * 2), X15);
+
+ subq $8, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+ lea (8 * 64)(DST), DST;
+ lea (8 * 64)(SRC), SRC;
+ movq SRC, (STACK_MAX + 5 * 8)(%rsp);
+ movq DST, (STACK_MAX + 6 * 8)(%rsp);
+
+ jnz .Loop_poly8;
+
+ /* Store state */
+ POLY1305_STORE_STATE();
+
+ /* clear the used vector registers and stack */
+ vpxor X0, X0, X0;
+ vmovdqa X0, (STACK_VEC_X12)(%rsp);
+ vmovdqa X0, (STACK_VEC_X13)(%rsp);
+ vmovdqa X0, (STACK_TMP)(%rsp);
+ vmovdqa X0, (STACK_TMP1)(%rsp);
+ vzeroall;
+
+ movq (STACK_MAX + 0 * 8)(%rsp), %rbx;
+ movq (STACK_MAX + 1 * 8)(%rsp), %r12;
+ movq (STACK_MAX + 2 * 8)(%rsp), %r13;
+ movq (STACK_MAX + 3 * 8)(%rsp), %r14;
+ movq (STACK_MAX + 4 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+
+ xorl %eax, %eax;
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_amd64_avx2_blocks8,
+ .-_gcry_chacha20_poly1305_amd64_avx2_blocks8;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S b/comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S
new file mode 100644
index 0000000000..9cdb69ae6d
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S
@@ -0,0 +1,1012 @@
+/* chacha20-amd64-ssse3.S - SSSE3 implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+.text
+
+#include "asm-common-amd64.h"
+#include "asm-poly1305-amd64.h"
+
+/* register macros */
+#define INPUT %rdi
+#define DST %rsi
+#define SRC %rdx
+#define NBLKS %rcx
+#define ROUND %eax
+
+/* stack structure */
+#define STACK_VEC_X12 (16)
+#define STACK_VEC_X13 (16 + STACK_VEC_X12)
+#define STACK_TMP (16 + STACK_VEC_X13)
+#define STACK_TMP1 (16 + STACK_TMP)
+#define STACK_TMP2 (16 + STACK_TMP1)
+
+#define STACK_MAX (16 + STACK_TMP2)
+
+/* vector registers */
+#define X0 %xmm0
+#define X1 %xmm1
+#define X2 %xmm2
+#define X3 %xmm3
+#define X4 %xmm4
+#define X5 %xmm5
+#define X6 %xmm6
+#define X7 %xmm7
+#define X8 %xmm8
+#define X9 %xmm9
+#define X10 %xmm10
+#define X11 %xmm11
+#define X12 %xmm12
+#define X13 %xmm13
+#define X14 %xmm14
+#define X15 %xmm15
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+ movdqa x0, t2; \
+ punpckhdq x1, t2; \
+ punpckldq x1, x0; \
+ \
+ movdqa x2, t1; \
+ punpckldq x3, t1; \
+ punpckhdq x3, x2; \
+ \
+ movdqa x0, x1; \
+ punpckhqdq t1, x1; \
+ punpcklqdq t1, x0; \
+ \
+ movdqa t2, x3; \
+ punpckhqdq x2, x3; \
+ punpcklqdq x2, t2; \
+ movdqa t2, x2;
+
+/* fill xmm register with 32-bit value from memory */
+#define pbroadcastd(mem32, xreg) \
+ movd mem32, xreg; \
+ pshufd $0, xreg, xreg;
+
+/* xor with unaligned memory operand */
+#define pxor_u(umem128, xreg, t) \
+ movdqu umem128, t; \
+ pxor t, xreg;
+
+/* xor register with unaligned src and save to unaligned dst */
+#define xor_src_dst(dst, src, offset, xreg, t) \
+ pxor_u(offset(src), xreg, t); \
+ movdqu xreg, offset(dst);
+
+#define clear(x) pxor x,x;
+
+/**********************************************************************
+ 4-way chacha20
+ **********************************************************************/
+
+#define ROTATE2(v1,v2,c,tmp1,tmp2) \
+ movdqa v1, tmp1; \
+ movdqa v2, tmp2; \
+ psrld $(32 - (c)), v1; \
+ pslld $(c), tmp1; \
+ paddb tmp1, v1; \
+ psrld $(32 - (c)), v2; \
+ pslld $(c), tmp2; \
+ paddb tmp2, v2;
+
+#define ROTATE_SHUF_2(v1,v2,shuf) \
+ pshufb shuf, v1; \
+ pshufb shuf, v2;
+
+#define XOR(ds,s) \
+ pxor s, ds;
+
+#define PLUS(ds,s) \
+ paddd s, ds;
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,tmp2,\
+ interleave_op1,interleave_op2) \
+ movdqa .Lshuf_rol16 rRIP, tmp1; \
+ interleave_op1; \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \
+ ROTATE_SHUF_2(d1, d2, tmp1); \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \
+ ROTATE2(b1, b2, 12, tmp1, tmp2); \
+ movdqa .Lshuf_rol8 rRIP, tmp1; \
+ interleave_op2; \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \
+ ROTATE_SHUF_2(d1, d2, tmp1); \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \
+ ROTATE2(b1, b2, 7, tmp1, tmp2);
+
+chacha20_data:
+.align 16
+.Lshuf_rol16:
+ .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.Lshuf_rol8:
+ .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+.Lcounter1:
+ .long 1,0,0,0
+.Linc_counter:
+ .long 0,1,2,3
+.Lunsigned_cmp:
+ .long 0x80000000,0x80000000,0x80000000,0x80000000
+
+.align 8
+.globl _gcry_chacha20_amd64_ssse3_blocks4
+ELF(.type _gcry_chacha20_amd64_ssse3_blocks4,@function;)
+
+_gcry_chacha20_amd64_ssse3_blocks4:
+ /* input:
+ * %rdi: input
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: nblks (multiple of 4)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ subq $STACK_MAX, %rsp;
+ andq $~15, %rsp;
+
+.Loop4:
+ mov $20, ROUND;
+
+ /* Construct counter vectors X12 and X13 */
+ movdqa .Linc_counter rRIP, X0;
+ movdqa .Lunsigned_cmp rRIP, X2;
+ pbroadcastd((12 * 4)(INPUT), X12);
+ pbroadcastd((13 * 4)(INPUT), X13);
+ paddd X0, X12;
+ movdqa X12, X1;
+ pxor X2, X0;
+ pxor X2, X1;
+ pcmpgtd X1, X0;
+ psubd X0, X13;
+ movdqa X12, (STACK_VEC_X12)(%rsp);
+ movdqa X13, (STACK_VEC_X13)(%rsp);
+
+ /* Load vectors */
+ pbroadcastd((0 * 4)(INPUT), X0);
+ pbroadcastd((1 * 4)(INPUT), X1);
+ pbroadcastd((2 * 4)(INPUT), X2);
+ pbroadcastd((3 * 4)(INPUT), X3);
+ pbroadcastd((4 * 4)(INPUT), X4);
+ pbroadcastd((5 * 4)(INPUT), X5);
+ pbroadcastd((6 * 4)(INPUT), X6);
+ pbroadcastd((7 * 4)(INPUT), X7);
+ pbroadcastd((8 * 4)(INPUT), X8);
+ pbroadcastd((9 * 4)(INPUT), X9);
+ pbroadcastd((10 * 4)(INPUT), X10);
+ pbroadcastd((11 * 4)(INPUT), X11);
+ pbroadcastd((14 * 4)(INPUT), X14);
+ pbroadcastd((15 * 4)(INPUT), X15);
+ movdqa X11, (STACK_TMP)(%rsp);
+ movdqa X15, (STACK_TMP1)(%rsp);
+
+.Lround2_4:
+ QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15,,)
+ movdqa (STACK_TMP)(%rsp), X11;
+ movdqa (STACK_TMP1)(%rsp), X15;
+ movdqa X8, (STACK_TMP)(%rsp);
+ movdqa X9, (STACK_TMP1)(%rsp);
+ QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9,,)
+ QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9,,)
+ movdqa (STACK_TMP)(%rsp), X8;
+ movdqa (STACK_TMP1)(%rsp), X9;
+ movdqa X11, (STACK_TMP)(%rsp);
+ movdqa X15, (STACK_TMP1)(%rsp);
+ QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15,,)
+ sub $2, ROUND;
+ jnz .Lround2_4;
+
+ /* tmp := X15 */
+ movdqa (STACK_TMP)(%rsp), X11;
+ pbroadcastd((0 * 4)(INPUT), X15);
+ PLUS(X0, X15);
+ pbroadcastd((1 * 4)(INPUT), X15);
+ PLUS(X1, X15);
+ pbroadcastd((2 * 4)(INPUT), X15);
+ PLUS(X2, X15);
+ pbroadcastd((3 * 4)(INPUT), X15);
+ PLUS(X3, X15);
+ pbroadcastd((4 * 4)(INPUT), X15);
+ PLUS(X4, X15);
+ pbroadcastd((5 * 4)(INPUT), X15);
+ PLUS(X5, X15);
+ pbroadcastd((6 * 4)(INPUT), X15);
+ PLUS(X6, X15);
+ pbroadcastd((7 * 4)(INPUT), X15);
+ PLUS(X7, X15);
+ pbroadcastd((8 * 4)(INPUT), X15);
+ PLUS(X8, X15);
+ pbroadcastd((9 * 4)(INPUT), X15);
+ PLUS(X9, X15);
+ pbroadcastd((10 * 4)(INPUT), X15);
+ PLUS(X10, X15);
+ pbroadcastd((11 * 4)(INPUT), X15);
+ PLUS(X11, X15);
+ movdqa (STACK_VEC_X12)(%rsp), X15;
+ PLUS(X12, X15);
+ movdqa (STACK_VEC_X13)(%rsp), X15;
+ PLUS(X13, X15);
+ movdqa X13, (STACK_TMP)(%rsp);
+ pbroadcastd((14 * 4)(INPUT), X15);
+ PLUS(X14, X15);
+ movdqa (STACK_TMP1)(%rsp), X15;
+ movdqa X14, (STACK_TMP1)(%rsp);
+ pbroadcastd((15 * 4)(INPUT), X13);
+ PLUS(X15, X13);
+ movdqa X15, (STACK_TMP2)(%rsp);
+
+ /* Update counter */
+ addq $4, (12 * 4)(INPUT);
+
+ transpose_4x4(X0, X1, X2, X3, X13, X14, X15);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0, X15);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1, X15);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2, X15);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3, X15);
+ transpose_4x4(X4, X5, X6, X7, X0, X1, X2);
+ movdqa (STACK_TMP)(%rsp), X13;
+ movdqa (STACK_TMP1)(%rsp), X14;
+ movdqa (STACK_TMP2)(%rsp), X15;
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 1), X4, X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 1), X5, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 1), X6, X0);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 1), X7, X0);
+ transpose_4x4(X8, X9, X10, X11, X0, X1, X2);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8, X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10, X0);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11, X0);
+ transpose_4x4(X12, X13, X14, X15, X0, X1, X2);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 3), X12, X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 3), X13, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 3), X14, X0);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 3), X15, X0);
+
+ sub $4, NBLKS;
+ lea (4 * 64)(DST), DST;
+ lea (4 * 64)(SRC), SRC;
+ jnz .Loop4;
+
+ /* clear the used vector registers and stack */
+ clear(X0);
+ movdqa X0, (STACK_VEC_X12)(%rsp);
+ movdqa X0, (STACK_VEC_X13)(%rsp);
+ movdqa X0, (STACK_TMP)(%rsp);
+ movdqa X0, (STACK_TMP1)(%rsp);
+ movdqa X0, (STACK_TMP2)(%rsp);
+ clear(X1);
+ clear(X2);
+ clear(X3);
+ clear(X4);
+ clear(X5);
+ clear(X6);
+ clear(X7);
+ clear(X8);
+ clear(X9);
+ clear(X10);
+ clear(X11);
+ clear(X12);
+ clear(X13);
+ clear(X14);
+ clear(X15);
+
+ /* eax zeroed by round loop. */
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_amd64_ssse3_blocks4,
+ .-_gcry_chacha20_amd64_ssse3_blocks4;)
+
+/**********************************************************************
+ 2-way && 1-way chacha20
+ **********************************************************************/
+
+#define ROTATE_SHUF(v1,shuf) \
+ pshufb shuf, v1;
+
+#define ROTATE(v1,c,tmp1) \
+ movdqa v1, tmp1; \
+ psrld $(32 - (c)), v1; \
+ pslld $(c), tmp1; \
+ paddb tmp1, v1;
+
+#define WORD_SHUF(v1,shuf) \
+ pshufd $shuf, v1, v1;
+
+#define QUARTERROUND4(x0,x1,x2,x3,shuf_rol8,shuf_rol16,tmp1,shuf_x1,\
+ shuf_x2,shuf_x3) \
+ PLUS(x0, x1); XOR(x3, x0); ROTATE_SHUF(x3, shuf_rol16); \
+ PLUS(x2, x3); XOR(x1, x2); ROTATE(x1, 12, tmp1); \
+ PLUS(x0, x1); XOR(x3, x0); ROTATE_SHUF(x3, shuf_rol8); \
+ PLUS(x2, x3); \
+ WORD_SHUF(x3, shuf_x3); \
+ XOR(x1, x2); \
+ WORD_SHUF(x2, shuf_x2); \
+ ROTATE(x1, 7, tmp1); \
+ WORD_SHUF(x1, shuf_x1);
+
+.align 8
+.globl _gcry_chacha20_amd64_ssse3_blocks1
+ELF(.type _gcry_chacha20_amd64_ssse3_blocks1,@function;)
+
+_gcry_chacha20_amd64_ssse3_blocks1:
+ /* input:
+ * %rdi: input
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: nblks
+ */
+ CFI_STARTPROC();
+
+ /* Load constants */
+ movdqa .Lcounter1 rRIP, X4;
+ movdqa .Lshuf_rol8 rRIP, X5;
+ movdqa .Lshuf_rol16 rRIP, X6;
+
+ /* Load state */
+ movdqu (0 * 4)(INPUT), X10;
+ movdqu (4 * 4)(INPUT), X11;
+ movdqu (8 * 4)(INPUT), X12;
+ movdqu (12 * 4)(INPUT), X13;
+
+ cmp $2, NBLKS;
+ jb .Loop1;
+
+ mov $20, ROUND;
+
+ movdqa X10, X0;
+ movdqa X11, X1;
+ movdqa X12, X2;
+ movdqa X13, X3;
+
+ movdqa X10, X8;
+ movdqa X11, X9;
+ movdqa X12, X14;
+ movdqa X13, X15;
+ paddq X4, X15;
+
+.Lround2_2:
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+ QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93);
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+ QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39);
+ sub $2, ROUND;
+ jnz .Lround2_2;
+
+ PLUS(X0, X10);
+ PLUS(X1, X11);
+ PLUS(X2, X12);
+ PLUS(X3, X13);
+
+ /* Update counter */
+ paddq X4, X13;
+
+ PLUS(X8, X10);
+ PLUS(X9, X11);
+ PLUS(X14, X12);
+ PLUS(X15, X13);
+
+ /* Update counter */
+ paddq X4, X13;
+
+ xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+ xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+ xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+ xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+ xor_src_dst(DST, SRC, 16 * 4, X8, X7);
+ xor_src_dst(DST, SRC, 20 * 4, X9, X7);
+ xor_src_dst(DST, SRC, 24 * 4, X14, X7);
+ xor_src_dst(DST, SRC, 28 * 4, X15, X7);
+
+ lea (2 * 64)(DST), DST;
+ lea (2 * 64)(SRC), SRC;
+
+ clear(X8);
+ clear(X9);
+ clear(X14);
+ clear(X15);
+
+ sub $2, NBLKS;
+ jz .Ldone1;
+
+.Loop1:
+ mov $20, ROUND;
+
+ movdqa X10, X0;
+ movdqa X11, X1;
+ movdqa X12, X2;
+ movdqa X13, X3;
+
+.Lround2_1:
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+ sub $2, ROUND;
+ jnz .Lround2_1;
+
+ PLUS(X0, X10);
+ PLUS(X1, X11);
+ PLUS(X2, X12);
+ PLUS(X3, X13);
+
+ /* Update counter */
+ paddq X4, X13;
+
+ xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+ xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+ xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+ xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+
+ lea (64)(DST), DST;
+ lea (64)(SRC), SRC;
+
+ sub $1, NBLKS;
+ jnz .Loop1;
+
+.Ldone1:
+ /* Store counter */
+ movdqu X13, (12 * 4)(INPUT);
+
+ /* clear the used vector registers */
+ clear(X0);
+ clear(X1);
+ clear(X2);
+ clear(X3);
+ clear(X4);
+ clear(X5);
+ clear(X6);
+ clear(X7);
+ clear(X10);
+ clear(X11);
+ clear(X12);
+ clear(X13);
+
+ /* eax zeroed by round loop. */
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_amd64_ssse3_blocks1,
+ .-_gcry_chacha20_amd64_ssse3_blocks1;)
+
+/**********************************************************************
+ 4-way stitched chacha20-poly1305
+ **********************************************************************/
+
+#define _ /*_*/
+
+.align 8
+.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks4
+ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks4,@function;)
+
+_gcry_chacha20_poly1305_amd64_ssse3_blocks4:
+ /* input:
+ * %rdi: input
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: nblks (multiple of 4)
+ * %r9: poly1305-state
+ * %r8: poly1305-src
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ subq $(9 * 8) + STACK_MAX + 16, %rsp;
+ andq $~15, %rsp;
+
+ movq %rbx, (STACK_MAX + 0 * 8)(%rsp);
+ movq %r12, (STACK_MAX + 1 * 8)(%rsp);
+ movq %r13, (STACK_MAX + 2 * 8)(%rsp);
+ movq %r14, (STACK_MAX + 3 * 8)(%rsp);
+ movq %r15, (STACK_MAX + 4 * 8)(%rsp);
+ CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8);
+ CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8);
+ CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8);
+ CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8);
+ CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8);
+
+ movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC
+ movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST
+ movq %rcx, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+ /* Load state */
+ POLY1305_LOAD_STATE();
+
+.Loop_poly4:
+
+ /* Construct counter vectors X12 and X13 */
+ movdqa .Linc_counter rRIP, X0;
+ movdqa .Lunsigned_cmp rRIP, X2;
+ pbroadcastd((12 * 4)(INPUT), X12);
+ pbroadcastd((13 * 4)(INPUT), X13);
+ paddd X0, X12;
+ movdqa X12, X1;
+ pxor X2, X0;
+ pxor X2, X1;
+ pcmpgtd X1, X0;
+ psubd X0, X13;
+ movdqa X12, (STACK_VEC_X12)(%rsp);
+ movdqa X13, (STACK_VEC_X13)(%rsp);
+
+ /* Load vectors */
+ pbroadcastd((0 * 4)(INPUT), X0);
+ pbroadcastd((1 * 4)(INPUT), X1);
+ pbroadcastd((2 * 4)(INPUT), X2);
+ pbroadcastd((3 * 4)(INPUT), X3);
+ pbroadcastd((4 * 4)(INPUT), X4);
+ pbroadcastd((5 * 4)(INPUT), X5);
+ pbroadcastd((6 * 4)(INPUT), X6);
+ pbroadcastd((7 * 4)(INPUT), X7);
+ pbroadcastd((8 * 4)(INPUT), X8);
+ pbroadcastd((9 * 4)(INPUT), X9);
+ pbroadcastd((10 * 4)(INPUT), X10);
+ pbroadcastd((11 * 4)(INPUT), X11);
+ pbroadcastd((14 * 4)(INPUT), X14);
+ pbroadcastd((15 * 4)(INPUT), X15);
+ movdqa X11, (STACK_TMP)(%rsp);
+ movdqa X15, (STACK_TMP1)(%rsp);
+
+ /* Process four ChaCha20 blocks and sixteen Poly1305 blocks. */
+
+ movl $20, (STACK_MAX + 8 * 8 + 4)(%rsp);
+.Lround4_with_poly1305_outer:
+ movl $6, (STACK_MAX + 8 * 8)(%rsp);
+.Lround4_with_poly1305_inner1:
+ /* rounds 0-5 & 10-15 */
+ POLY1305_BLOCK_PART1(0 * 16)
+ QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15,
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3())
+ movdqa (STACK_TMP)(%rsp), X11;
+ movdqa (STACK_TMP1)(%rsp), X15;
+ movdqa X8, (STACK_TMP)(%rsp);
+ movdqa X9, (STACK_TMP1)(%rsp);
+ QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9,
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5())
+ POLY1305_BLOCK_PART1(1 * 16)
+ lea (2 * 16)(POLY_RSRC), POLY_RSRC;
+ QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9,
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3())
+ movdqa (STACK_TMP)(%rsp), X8;
+ movdqa (STACK_TMP1)(%rsp), X9;
+ movdqa X11, (STACK_TMP)(%rsp);
+ movdqa X15, (STACK_TMP1)(%rsp);
+ QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15,
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5())
+
+ subl $2, (STACK_MAX + 8 * 8)(%rsp);
+ jnz .Lround4_with_poly1305_inner1;
+
+ movl $4, (STACK_MAX + 8 * 8)(%rsp);
+.Lround4_with_poly1305_inner2:
+ /* rounds 6-9 & 16-19 */
+ POLY1305_BLOCK_PART1(0 * 16)
+ lea (1 * 16)(POLY_RSRC), POLY_RSRC;
+ QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15,
+ POLY1305_BLOCK_PART2(),
+ _)
+ movdqa (STACK_TMP)(%rsp), X11;
+ movdqa (STACK_TMP1)(%rsp), X15;
+ movdqa X8, (STACK_TMP)(%rsp);
+ movdqa X9, (STACK_TMP1)(%rsp);
+ QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9,
+ POLY1305_BLOCK_PART3(),
+ _)
+ QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9,
+ POLY1305_BLOCK_PART4(),
+ _)
+ movdqa (STACK_TMP)(%rsp), X8;
+ movdqa (STACK_TMP1)(%rsp), X9;
+ movdqa X11, (STACK_TMP)(%rsp);
+ movdqa X15, (STACK_TMP1)(%rsp);
+ QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15,
+ POLY1305_BLOCK_PART5(),
+ _)
+
+ subl $2, (STACK_MAX + 8 * 8)(%rsp);
+ jnz .Lround4_with_poly1305_inner2;
+
+ subl $10, (STACK_MAX + 8 * 8 + 4)(%rsp);
+ jnz .Lround4_with_poly1305_outer;
+
+ /* tmp := X15 */
+ movdqa (STACK_TMP)(%rsp), X11;
+ pbroadcastd((0 * 4)(INPUT), X15);
+ PLUS(X0, X15);
+ pbroadcastd((1 * 4)(INPUT), X15);
+ PLUS(X1, X15);
+ pbroadcastd((2 * 4)(INPUT), X15);
+ PLUS(X2, X15);
+ pbroadcastd((3 * 4)(INPUT), X15);
+ PLUS(X3, X15);
+ pbroadcastd((4 * 4)(INPUT), X15);
+ PLUS(X4, X15);
+ pbroadcastd((5 * 4)(INPUT), X15);
+ PLUS(X5, X15);
+ pbroadcastd((6 * 4)(INPUT), X15);
+ PLUS(X6, X15);
+ pbroadcastd((7 * 4)(INPUT), X15);
+ PLUS(X7, X15);
+ pbroadcastd((8 * 4)(INPUT), X15);
+ PLUS(X8, X15);
+ pbroadcastd((9 * 4)(INPUT), X15);
+ PLUS(X9, X15);
+ pbroadcastd((10 * 4)(INPUT), X15);
+ PLUS(X10, X15);
+ pbroadcastd((11 * 4)(INPUT), X15);
+ PLUS(X11, X15);
+ movdqa (STACK_VEC_X12)(%rsp), X15;
+ PLUS(X12, X15);
+ movdqa (STACK_VEC_X13)(%rsp), X15;
+ PLUS(X13, X15);
+ movdqa X13, (STACK_TMP)(%rsp);
+ pbroadcastd((14 * 4)(INPUT), X15);
+ PLUS(X14, X15);
+ movdqa (STACK_TMP1)(%rsp), X15;
+ movdqa X14, (STACK_TMP1)(%rsp);
+ pbroadcastd((15 * 4)(INPUT), X13);
+ PLUS(X15, X13);
+ movdqa X15, (STACK_TMP2)(%rsp);
+
+ /* Update counter */
+ addq $4, (12 * 4)(INPUT);
+
+ movq (STACK_MAX + 5 * 8)(%rsp), SRC;
+ movq (STACK_MAX + 6 * 8)(%rsp), DST;
+
+ transpose_4x4(X0, X1, X2, X3, X13, X14, X15);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0, X15);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1, X15);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2, X15);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3, X15);
+ transpose_4x4(X4, X5, X6, X7, X0, X1, X2);
+ movdqa (STACK_TMP)(%rsp), X13;
+ movdqa (STACK_TMP1)(%rsp), X14;
+ movdqa (STACK_TMP2)(%rsp), X15;
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 1), X4, X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 1), X5, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 1), X6, X0);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 1), X7, X0);
+ transpose_4x4(X8, X9, X10, X11, X0, X1, X2);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8, X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10, X0);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11, X0);
+ transpose_4x4(X12, X13, X14, X15, X0, X1, X2);
+ xor_src_dst(DST, SRC, (64 * 0 + 16 * 3), X12, X0);
+ xor_src_dst(DST, SRC, (64 * 1 + 16 * 3), X13, X0);
+ xor_src_dst(DST, SRC, (64 * 2 + 16 * 3), X14, X0);
+ xor_src_dst(DST, SRC, (64 * 3 + 16 * 3), X15, X0);
+
+ subq $4, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+ lea (4 * 64)(DST), DST;
+ lea (4 * 64)(SRC), SRC;
+ movq SRC, (STACK_MAX + 5 * 8)(%rsp);
+ movq DST, (STACK_MAX + 6 * 8)(%rsp);
+
+ jnz .Loop_poly4;
+
+ /* Store state */
+ POLY1305_STORE_STATE();
+
+ /* clear the used vector registers and stack */
+ clear(X0);
+ movdqa X0, (STACK_VEC_X12)(%rsp);
+ movdqa X0, (STACK_VEC_X13)(%rsp);
+ movdqa X0, (STACK_TMP)(%rsp);
+ movdqa X0, (STACK_TMP1)(%rsp);
+ movdqa X0, (STACK_TMP2)(%rsp);
+ clear(X1);
+ clear(X2);
+ clear(X3);
+ clear(X4);
+ clear(X5);
+ clear(X6);
+ clear(X7);
+ clear(X8);
+ clear(X9);
+ clear(X10);
+ clear(X11);
+ clear(X12);
+ clear(X13);
+ clear(X14);
+ clear(X15);
+
+ movq (STACK_MAX + 0 * 8)(%rsp), %rbx;
+ movq (STACK_MAX + 1 * 8)(%rsp), %r12;
+ movq (STACK_MAX + 2 * 8)(%rsp), %r13;
+ movq (STACK_MAX + 3 * 8)(%rsp), %r14;
+ movq (STACK_MAX + 4 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+
+ xorl %eax, %eax;
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4,
+ .-_gcry_chacha20_poly1305_amd64_ssse3_blocks4;)
+
+/**********************************************************************
+ 2-way && 1-way stitched chacha20-poly1305
+ **********************************************************************/
+
+.align 8
+.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks1
+ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks1,@function;)
+
+_gcry_chacha20_poly1305_amd64_ssse3_blocks1:
+ /* input:
+ * %rdi: chacha20-state
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: nblks
+ * %r9: poly1305-state
+ * %r8: poly1305-src
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ subq $(9 * 8), %rsp;
+ movq %rbx, (0 * 8)(%rsp);
+ movq %r12, (1 * 8)(%rsp);
+ movq %r13, (2 * 8)(%rsp);
+ movq %r14, (3 * 8)(%rsp);
+ movq %r15, (4 * 8)(%rsp);
+ CFI_REG_ON_STACK(rbx, 0 * 8);
+ CFI_REG_ON_STACK(r12, 1 * 8);
+ CFI_REG_ON_STACK(r13, 2 * 8);
+ CFI_REG_ON_STACK(r14, 3 * 8);
+ CFI_REG_ON_STACK(r15, 4 * 8);
+
+ movq %rdx, (5 * 8)(%rsp); # SRC
+ movq %rsi, (6 * 8)(%rsp); # DST
+ movq %rcx, (7 * 8)(%rsp); # NBLKS
+
+ /* Load constants */
+ movdqa .Lcounter1 rRIP, X4;
+ movdqa .Lshuf_rol8 rRIP, X5;
+ movdqa .Lshuf_rol16 rRIP, X6;
+
+ /* Load state */
+ movdqu (0 * 4)(INPUT), X10;
+ movdqu (4 * 4)(INPUT), X11;
+ movdqu (8 * 4)(INPUT), X12;
+ movdqu (12 * 4)(INPUT), X13;
+
+ POLY1305_LOAD_STATE();
+
+ cmpq $2, (7 * 8)(%rsp); #NBLKS
+ jb .Loop_poly1;
+
+ movdqa X10, X0;
+ movdqa X11, X1;
+ movdqa X12, X2;
+ movdqa X13, X3;
+
+ movdqa X10, X8;
+ movdqa X11, X9;
+ movdqa X12, X14;
+ movdqa X13, X15;
+ paddq X4, X15;
+
+ /* Process two ChaCha20 blocks and eight Poly1305 blocks. */
+
+ movl $20, (8 * 8 + 4)(%rsp);
+.Lround2_with_poly1305_outer:
+ movl $8, (8 * 8)(%rsp);
+.Lround2_with_poly1305_inner:
+ POLY1305_BLOCK_PART1(0 * 16);
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+ lea (1 * 16)(POLY_RSRC), POLY_RSRC;
+ POLY1305_BLOCK_PART2();
+ QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93);
+ POLY1305_BLOCK_PART3();
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+ POLY1305_BLOCK_PART4();
+ QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39);
+ POLY1305_BLOCK_PART5();
+
+ subl $2, (8 * 8)(%rsp);
+ jnz .Lround2_with_poly1305_inner;
+
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+ QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93);
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+ QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39);
+
+ subl $10, (8 * 8 + 4)(%rsp);
+ jnz .Lround2_with_poly1305_outer;
+
+ movq (5 * 8)(%rsp), SRC;
+ movq (6 * 8)(%rsp), DST;
+
+ PLUS(X0, X10);
+ PLUS(X1, X11);
+ PLUS(X2, X12);
+ PLUS(X3, X13);
+
+ /* Update counter */
+ paddq X4, X13;
+
+ PLUS(X8, X10);
+ PLUS(X9, X11);
+ PLUS(X14, X12);
+ PLUS(X15, X13);
+
+ /* Update counter */
+ paddq X4, X13;
+
+ xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+ xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+ xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+ xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+ xor_src_dst(DST, SRC, 16 * 4, X8, X7);
+ xor_src_dst(DST, SRC, 20 * 4, X9, X7);
+ xor_src_dst(DST, SRC, 24 * 4, X14, X7);
+ xor_src_dst(DST, SRC, 28 * 4, X15, X7);
+
+ clear(X8);
+ clear(X9);
+ clear(X14);
+ clear(X15);
+
+ subq $2, (7 * 8)(%rsp); # NBLKS
+ lea (2 * 64)(SRC), SRC;
+ lea (2 * 64)(DST), DST;
+ movq SRC, (5 * 8)(%rsp);
+ movq DST, (6 * 8)(%rsp);
+ jz .Ldone_poly1;
+
+.Loop_poly1:
+ movdqa X10, X0;
+ movdqa X11, X1;
+ movdqa X12, X2;
+ movdqa X13, X3;
+
+ /* Process one ChaCha20 block and four Poly1305 blocks. */
+
+ movl $20, (8 * 8 + 4)(%rsp);
+.Lround1_with_poly1305_outer:
+ movl $8, (8 * 8)(%rsp);
+.Lround1_with_poly1305_inner:
+ POLY1305_BLOCK_PART1(0 * 16);
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+ POLY1305_BLOCK_PART2();
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+ lea (1 * 16)(POLY_RSRC), POLY_RSRC;
+
+ POLY1305_BLOCK_PART3();
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+ POLY1305_BLOCK_PART4();
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+ POLY1305_BLOCK_PART5();
+
+ subl $4, (8 * 8)(%rsp);
+ jnz .Lround1_with_poly1305_inner;
+
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+ QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+
+ subl $10, (8 * 8 + 4)(%rsp);
+ jnz .Lround1_with_poly1305_outer;
+
+ movq (5 * 8)(%rsp), SRC;
+ movq (6 * 8)(%rsp), DST;
+
+ PLUS(X0, X10);
+ PLUS(X1, X11);
+ PLUS(X2, X12);
+ PLUS(X3, X13);
+
+ /* Update counter */
+ paddq X4, X13;
+
+ xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+ xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+ xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+ xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+
+ subq $1, (7 * 8)(%rsp); # NBLKS
+ lea (64)(SRC), SRC;
+ lea (64)(DST), DST;
+ movq SRC, (5 * 8)(%rsp);
+ movq DST, (6 * 8)(%rsp);
+
+ jnz .Loop_poly1;
+
+.Ldone_poly1:
+ /* Store state */
+ POLY1305_STORE_STATE();
+
+ movdqu X13, (12 * 4)(INPUT);
+
+ /* clear the used vector registers */
+ clear(X0);
+ clear(X1);
+ clear(X2);
+ clear(X3);
+ clear(X4);
+ clear(X5);
+ clear(X6);
+ clear(X7);
+ clear(X10);
+ clear(X11);
+ clear(X12);
+ clear(X13);
+
+ movq (0 * 8)(%rsp), %rbx;
+ movq (1 * 8)(%rsp), %r12;
+ movq (2 * 8)(%rsp), %r13;
+ movq (3 * 8)(%rsp), %r14;
+ movq (4 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+
+ xorl %eax, %eax;
+ leave;
+ CFI_LEAVE();
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks1,
+ .-_gcry_chacha20_poly1305_amd64_ssse3_blocks1;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S b/comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S
new file mode 100644
index 0000000000..33a43df1f3
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S
@@ -0,0 +1,393 @@
+/* chacha20-armv7-neon.S - ARMv7 NEON implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2017,2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.syntax unified
+.fpu neon
+.arm
+
+.text
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* register macros */
+#define INPUT r0
+#define DST r1
+#define SRC r2
+#define NBLKS r3
+#define ROUND r4
+
+/* stack structure */
+#define STACK_VEC_X12 (16)
+#define STACK_VEC_X13 (STACK_VEC_X12 + 16)
+#define STACK_TMP (STACK_VEC_X13 + 16)
+#define STACK_TMP1 (16 + STACK_TMP)
+#define STACK_TMP2 (16 + STACK_TMP1)
+
+#define STACK_MAX (16 + STACK_TMP2)
+
+/* vector registers */
+#define X0 q0
+#define X1 q1
+#define X2 q2
+#define X3 q3
+#define X4 q4
+#define X5 q5
+#define X6 q6
+#define X7 q7
+#define X8 q8
+#define X9 q9
+#define X10 q10
+#define X11 q11
+#define X12 q12
+#define X13 q13
+#define X14 q14
+#define X15 q15
+
+#define X0l d0
+#define X1l d2
+#define X2l d4
+#define X3l d6
+#define X4l d8
+#define X5l d10
+#define X6l d12
+#define X7l d14
+#define X8l d16
+#define X9l d18
+#define X10l d20
+#define X11l d22
+#define X12l d24
+#define X13l d26
+#define X14l d28
+#define X15l d30
+
+#define X0h d1
+#define X1h d3
+#define X2h d5
+#define X3h d7
+#define X4h d9
+#define X5h d11
+#define X6h d13
+#define X7h d15
+#define X8h d17
+#define X9h d19
+#define X10h d21
+#define X11h d23
+#define X12h d25
+#define X13h d27
+#define X14h d29
+#define X15h d31
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4_part1(_q0, _q1, _q2, _q3) \
+ vtrn.32 _q0, _q1; \
+ vtrn.32 _q2, _q3;
+#define transpose_4x4_part2(_q0, _q1, _q2, _q3) \
+ vswp _q0##h, _q2##l; \
+ vswp _q1##h, _q3##l;
+
+#define clear(x) veor x,x,x;
+
+/**********************************************************************
+ 4-way chacha20
+ **********************************************************************/
+
+#define ROTATE2(dst1,dst2,c,src1,src2) \
+ vshl.u32 dst1, src1, #(c); \
+ vshl.u32 dst2, src2, #(c); \
+ vsri.u32 dst1, src1, #(32 - (c)); \
+ vsri.u32 dst2, src2, #(32 - (c));
+
+#define ROTATE2_16(dst1,dst2,src1,src2) \
+ vrev32.16 dst1, src1; \
+ vrev32.16 dst2, src2;
+
+#define XOR(d,s1,s2) \
+ veor d, s2, s1;
+
+#define PLUS(ds,s) \
+ vadd.u32 ds, ds, s;
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,tmp2) \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); \
+ ROTATE2_16(d1, d2, tmp1, tmp2); \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); \
+ ROTATE2(b1, b2, 12, tmp1, tmp2); \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); \
+ ROTATE2(d1, d2, 8, tmp1, tmp2); \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); \
+ ROTATE2(b1, b2, 7, tmp1, tmp2);
+
+chacha20_data:
+.align 4
+.Linc_counter:
+ .long 0,1,2,3
+
+.align 3
+.globl _gcry_chacha20_armv7_neon_blocks4
+.type _gcry_chacha20_armv7_neon_blocks4,%function;
+
+_gcry_chacha20_armv7_neon_blocks4:
+ /* input:
+ * r0: input
+ * r1: dst
+ * r2: src
+ * r3: nblks (multiple of 4)
+ */
+
+ vpush {q4-q7};
+ push {r4-r12,lr};
+
+ mov r12, sp
+
+ mov r6, sp;
+ sub r6, r6, #(STACK_MAX);
+ and r6, r6, #(~15);
+ mov sp, r6;
+ GET_DATA_POINTER(r9, .Linc_counter, lr);
+ add lr, INPUT, #(12*4);
+ add r8, sp, #STACK_VEC_X12;
+
+.Loop4:
+ mov ROUND, #20;
+
+ /* Construct counter vectors X12 and X13 */
+
+ vld1.8 {X15}, [lr];
+ mov lr, INPUT;
+ vld1.8 {X8}, [r9];
+ vdup.32 X12, X15l[0];
+ vdup.32 X13, X15l[1];
+ vld1.8 {X3}, [lr]!;
+ vadd.u32 X12, X12, X8;
+ vdup.32 X0, X3l[0];
+ vdup.32 X1, X3l[1];
+ vdup.32 X2, X3h[0];
+ vcgt.u32 X8, X8, X12;
+ vdup.32 X3, X3h[1];
+ vdup.32 X14, X15h[0];
+ vdup.32 X15, X15h[1];
+ vsub.u32 X13, X13, X8;
+ vld1.8 {X7}, [lr]!;
+ vld1.8 {X11}, [lr];
+ vst1.8 {X12, X13}, [r8];
+ vdup.32 X4, X7l[0];
+ vdup.32 X5, X7l[1];
+ vdup.32 X6, X7h[0];
+ vdup.32 X7, X7h[1];
+ vdup.32 X8, X11l[0];
+ vdup.32 X9, X11l[1];
+ vdup.32 X10, X11h[0];
+ vdup.32 X11, X11h[1];
+
+ add r7, sp, #STACK_TMP2;
+ add r6, sp, #STACK_TMP1;
+ add r5, sp, #STACK_TMP;
+ vst1.8 {X15}, [r6];
+ vst1.8 {X11}, [r5];
+
+ mov lr, INPUT;
+.Lround2:
+ subs ROUND, ROUND, #2
+ QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15)
+ vld1.8 {X11}, [r5];
+ vld1.8 {X15}, [r6];
+ vst1.8 {X8}, [r5];
+ vst1.8 {X9}, [r6];
+ QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9)
+ QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9)
+ vld1.8 {X8}, [r5];
+ vld1.8 {X9}, [r6];
+ vst1.8 {X11}, [r5];
+ vst1.8 {X15}, [r6];
+ QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15)
+ bne .Lround2;
+
+ vld1.8 {X11}, [lr]!;
+ vst1.8 {X14}, [r7];
+
+ vdup.32 X14, X11l[0]; /* INPUT + 0 * 4 */
+ vdup.32 X15, X11l[1]; /* INPUT + 1 * 4 */
+ PLUS(X0, X14);
+ PLUS(X1, X15);
+ vdup.32 X14, X11h[0]; /* INPUT + 2 * 4 */
+ vdup.32 X15, X11h[1]; /* INPUT + 3 * 4 */
+ PLUS(X2, X14);
+ PLUS(X3, X15);
+
+ vld1.8 {X11}, [r5];
+ vld1.8 {X15}, [r6];
+ vst1.8 {X0}, [r5];
+ vld1.8 {X0}, [lr]!;
+ vst1.8 {X1}, [r6];
+
+ vdup.32 X14, X0l[0]; /* INPUT + 4 * 4 */
+ vdup.32 X1, X0l[1]; /* INPUT + 5 * 4 */
+ PLUS(X4, X14);
+ PLUS(X5, X1);
+ vdup.32 X14, X0h[0]; /* INPUT + 6 * 4 */
+ vdup.32 X1, X0h[1]; /* INPUT + 7 * 4 */
+ PLUS(X6, X14);
+ PLUS(X7, X1);
+
+ vld1.8 {X0}, [lr]!;
+
+ vdup.32 X14, X0l[0]; /* INPUT + 8 * 4 */
+ vdup.32 X1, X0l[1]; /* INPUT + 9 * 4 */
+ PLUS(X8, X14);
+ PLUS(X9, X1);
+ vdup.32 X14, X0h[0]; /* INPUT + 10 * 4 */
+ vdup.32 X1, X0h[1]; /* INPUT + 11 * 4 */
+ PLUS(X10, X14);
+ PLUS(X11, X1);
+
+ vld1.8 {X0}, [lr];
+ add lr, INPUT, #(12*4)
+ vld1.8 {X14}, [r7];
+
+ vdup.32 X1, X0h[0]; /* INPUT + 10 * 4 */
+ ldm lr, {r10, r11}; /* Update counter */
+ vdup.32 X0, X0h[1]; /* INPUT + 11 * 4 */
+ PLUS(X14, X1);
+ PLUS(X15, X0);
+ adds r10, r10, #4; /* Update counter */
+ vld1.8 {X0, X1}, [r8];
+
+ PLUS(X12, X0);
+ vld1.8 {X0}, [r5];
+ PLUS(X13, X1);
+ adc r11, r11, #0; /* Update counter */
+
+ vld1.8 {X1}, [r6];
+ stm lr, {r10, r11}; /* Update counter */
+ transpose_4x4_part1(X0, X1, X2, X3);
+ transpose_4x4_part1(X4, X5, X6, X7);
+ transpose_4x4_part1(X8, X9, X10, X11);
+ transpose_4x4_part1(X12, X13, X14, X15);
+ transpose_4x4_part2(X0, X1, X2, X3);
+ transpose_4x4_part2(X4, X5, X6, X7);
+ transpose_4x4_part2(X8, X9, X10, X11);
+ transpose_4x4_part2(X12, X13, X14, X15);
+
+ subs NBLKS, NBLKS, #4;
+
+ vst1.8 {X10}, [r5];
+ add lr, INPUT, #(12*4)
+ vst1.8 {X11}, [r6];
+ vld1.8 {X10, X11}, [SRC]!;
+ veor X10, X0, X10;
+ vld1.8 {X0}, [SRC]!;
+ veor X11, X4, X11;
+ vld1.8 {X4}, [SRC]!;
+ vst1.8 {X10, X11}, [DST]!;
+ vld1.8 {X10, X11}, [SRC]!;
+ veor X0, X8, X0;
+ veor X4, X12, X4;
+ veor X10, X1, X10;
+ veor X11, X5, X11;
+ vst1.8 {X0}, [DST]!;
+ vld1.8 {X0, X1}, [SRC]!;
+ vst1.8 {X4}, [DST]!;
+ vld1.8 {X4, X5}, [SRC]!;
+ vst1.8 {X10, X11}, [DST]!;
+ vld1.8 {X10}, [r5];
+ vld1.8 {X11}, [r6];
+ veor X0, X9, X0;
+ vld1.8 {X8, X9}, [SRC]!;
+ veor X1, X13, X1;
+ vld1.8 {X12, X13}, [SRC]!;
+ veor X4, X2, X4;
+ veor X5, X6, X5;
+ vst1.8 {X0, X1}, [DST]!;
+ vld1.8 {X0, X1}, [SRC]!;
+ vst1.8 {X4, X5}, [DST]!;
+ veor X8, X10, X8;
+ veor X9, X14, X9;
+ veor X12, X3, X12;
+ veor X13, X7, X13;
+ veor X0, X11, X0;
+ veor X1, X15, X1;
+ vst1.8 {X8, X9}, [DST]!;
+ vst1.8 {X12, X13}, [DST]!;
+ vst1.8 {X0, X1}, [DST]!;
+
+ bne .Loop4;
+
+ /* clear the used vector registers and stack */
+ clear(X0);
+ vst1.8 {X0}, [r5];
+ vst1.8 {X0}, [r6];
+ vst1.8 {X0}, [r7];
+ vst1.8 {X0}, [r8]!;
+ vst1.8 {X0}, [r8];
+
+ mov sp, r12
+ clear(X1);
+ clear(X2);
+ clear(X3);
+ clear(X4);
+ clear(X5);
+ clear(X6);
+ clear(X7);
+ clear(X8);
+ clear(X9);
+ clear(X10);
+ clear(X11);
+ clear(X12);
+ clear(X13);
+ clear(X14);
+ clear(X15);
+
+ pop {r4-r12,lr}
+ vpop {q4-q7}
+ eor r0, r0, r0
+ bx lr
+.size _gcry_chacha20_armv7_neon_blocks4, .-_gcry_chacha20_armv7_neon_blocks4;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/chacha20-ppc.c b/comm/third_party/libgcrypt/cipher/chacha20-ppc.c
new file mode 100644
index 0000000000..4a21b837d1
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/chacha20-ppc.c
@@ -0,0 +1,646 @@
+/* chacha20-ppc.c - PowerPC vector implementation of ChaCha20
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+ defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+ defined(USE_CHACHA20) && \
+ __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+#include "poly1305-internal.h"
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned int vector4x_u32;
+typedef vector unsigned long long vector2x_u64;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+#ifdef WORDS_BIGENDIAN
+static const vector16x_u8 le_bswap_const =
+ { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+#endif
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_rol_elems(vector4x_u32 v, unsigned int idx)
+{
+#ifndef WORDS_BIGENDIAN
+ return vec_sld (v, v, (16 - (4 * idx)) & 15);
+#else
+ return vec_sld (v, v, (4 * idx) & 15);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_load_le(unsigned long offset, const unsigned char *ptr)
+{
+ vector4x_u32 vec;
+ vec = vec_vsx_ld (offset, (const u32 *)ptr);
+#ifdef WORDS_BIGENDIAN
+ vec = (vector4x_u32)vec_perm((vector16x_u8)vec, (vector16x_u8)vec,
+ le_bswap_const);
+#endif
+ return vec;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+vec_store_le(vector4x_u32 vec, unsigned long offset, unsigned char *ptr)
+{
+#ifdef WORDS_BIGENDIAN
+ vec = (vector4x_u32)vec_perm((vector16x_u8)vec, (vector16x_u8)vec,
+ le_bswap_const);
+#endif
+ vec_vsx_st (vec, offset, (u32 *)ptr);
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_add_ctr_u64(vector4x_u32 v, vector4x_u32 a)
+{
+#ifdef WORDS_BIGENDIAN
+ static const vector16x_u8 swap32 =
+ { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
+ vector2x_u64 vec, add, sum;
+
+ vec = (vector2x_u64)vec_perm((vector16x_u8)v, (vector16x_u8)v, swap32);
+ add = (vector2x_u64)vec_perm((vector16x_u8)a, (vector16x_u8)a, swap32);
+ sum = vec + add;
+ return (vector4x_u32)vec_perm((vector16x_u8)sum, (vector16x_u8)sum, swap32);
+#else
+ return (vector4x_u32)((vector2x_u64)(v) + (vector2x_u64)(a));
+#endif
+}
+
+
+/**********************************************************************
+ 2-way && 1-way chacha20
+ **********************************************************************/
+
+#define ROTATE(v1,rolv) \
+ __asm__ ("vrlw %0,%1,%2\n\t" : "=v" (v1) : "v" (v1), "v" (rolv))
+
+#define WORD_ROL(v1,c) \
+ ((v1) = vec_rol_elems((v1), (c)))
+
+#define XOR(ds,s) \
+ ((ds) ^= (s))
+
+#define PLUS(ds,s) \
+ ((ds) += (s))
+
+#define QUARTERROUND4(x0,x1,x2,x3,rol_x1,rol_x2,rol_x3) \
+ PLUS(x0, x1); XOR(x3, x0); ROTATE(x3, rotate_16); \
+ PLUS(x2, x3); XOR(x1, x2); ROTATE(x1, rotate_12); \
+ PLUS(x0, x1); XOR(x3, x0); ROTATE(x3, rotate_8); \
+ PLUS(x2, x3); \
+ WORD_ROL(x3, rol_x3); \
+ XOR(x1, x2); \
+ WORD_ROL(x2, rol_x2); \
+ ROTATE(x1, rotate_7); \
+ WORD_ROL(x1, rol_x1);
+
+#define ADD_U64(v,a) \
+ (v = vec_add_ctr_u64(v, a))
+
+unsigned int ASM_FUNC_ATTR
+_gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src,
+ size_t nblks)
+{
+ vector4x_u32 counter_1 = { 1, 0, 0, 0 };
+ vector4x_u32 rotate_16 = { 16, 16, 16, 16 };
+ vector4x_u32 rotate_12 = { 12, 12, 12, 12 };
+ vector4x_u32 rotate_8 = { 8, 8, 8, 8 };
+ vector4x_u32 rotate_7 = { 7, 7, 7, 7 };
+ vector4x_u32 state0, state1, state2, state3;
+ vector4x_u32 v0, v1, v2, v3;
+ vector4x_u32 v4, v5, v6, v7;
+ int i;
+
+ /* force preload of constants to vector registers */
+ __asm__ ("": "+v" (counter_1) :: "memory");
+ __asm__ ("": "+v" (rotate_16) :: "memory");
+ __asm__ ("": "+v" (rotate_12) :: "memory");
+ __asm__ ("": "+v" (rotate_8) :: "memory");
+ __asm__ ("": "+v" (rotate_7) :: "memory");
+
+ state0 = vec_vsx_ld(0 * 16, state);
+ state1 = vec_vsx_ld(1 * 16, state);
+ state2 = vec_vsx_ld(2 * 16, state);
+ state3 = vec_vsx_ld(3 * 16, state);
+
+ while (nblks >= 2)
+ {
+ v0 = state0;
+ v1 = state1;
+ v2 = state2;
+ v3 = state3;
+
+ v4 = state0;
+ v5 = state1;
+ v6 = state2;
+ v7 = state3;
+ ADD_U64(v7, counter_1);
+
+ for (i = 20; i > 0; i -= 2)
+ {
+ QUARTERROUND4(v0, v1, v2, v3, 1, 2, 3);
+ QUARTERROUND4(v4, v5, v6, v7, 1, 2, 3);
+ QUARTERROUND4(v0, v1, v2, v3, 3, 2, 1);
+ QUARTERROUND4(v4, v5, v6, v7, 3, 2, 1);
+ }
+
+ v0 += state0;
+ v1 += state1;
+ v2 += state2;
+ v3 += state3;
+ ADD_U64(state3, counter_1); /* update counter */
+ v4 += state0;
+ v5 += state1;
+ v6 += state2;
+ v7 += state3;
+ ADD_U64(state3, counter_1); /* update counter */
+
+ v0 ^= vec_load_le(0 * 16, src);
+ v1 ^= vec_load_le(1 * 16, src);
+ v2 ^= vec_load_le(2 * 16, src);
+ v3 ^= vec_load_le(3 * 16, src);
+ vec_store_le(v0, 0 * 16, dst);
+ vec_store_le(v1, 1 * 16, dst);
+ vec_store_le(v2, 2 * 16, dst);
+ vec_store_le(v3, 3 * 16, dst);
+ src += 64;
+ dst += 64;
+ v4 ^= vec_load_le(0 * 16, src);
+ v5 ^= vec_load_le(1 * 16, src);
+ v6 ^= vec_load_le(2 * 16, src);
+ v7 ^= vec_load_le(3 * 16, src);
+ vec_store_le(v4, 0 * 16, dst);
+ vec_store_le(v5, 1 * 16, dst);
+ vec_store_le(v6, 2 * 16, dst);
+ vec_store_le(v7, 3 * 16, dst);
+ src += 64;
+ dst += 64;
+
+ nblks -= 2;
+ }
+
+ while (nblks)
+ {
+ v0 = state0;
+ v1 = state1;
+ v2 = state2;
+ v3 = state3;
+
+ for (i = 20; i > 0; i -= 2)
+ {
+ QUARTERROUND4(v0, v1, v2, v3, 1, 2, 3);
+ QUARTERROUND4(v0, v1, v2, v3, 3, 2, 1);
+ }
+
+ v0 += state0;
+ v1 += state1;
+ v2 += state2;
+ v3 += state3;
+ ADD_U64(state3, counter_1); /* update counter */
+
+ v0 ^= vec_load_le(0 * 16, src);
+ v1 ^= vec_load_le(1 * 16, src);
+ v2 ^= vec_load_le(2 * 16, src);
+ v3 ^= vec_load_le(3 * 16, src);
+ vec_store_le(v0, 0 * 16, dst);
+ vec_store_le(v1, 1 * 16, dst);
+ vec_store_le(v2, 2 * 16, dst);
+ vec_store_le(v3, 3 * 16, dst);
+ src += 64;
+ dst += 64;
+
+ nblks--;
+ }
+
+ vec_vsx_st(state3, 3 * 16, state); /* store counter */
+
+ return 0;
+}
+
+
+/**********************************************************************
+ 4-way chacha20
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3) ({ \
+ vector4x_u32 t1 = vec_mergeh(x0, x2); \
+ vector4x_u32 t2 = vec_mergel(x0, x2); \
+ vector4x_u32 t3 = vec_mergeh(x1, x3); \
+ x3 = vec_mergel(x1, x3); \
+ x0 = vec_mergeh(t1, t3); \
+ x1 = vec_mergel(t1, t3); \
+ x2 = vec_mergeh(t2, x3); \
+ x3 = vec_mergel(t2, x3); \
+ })
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2) \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \
+ ROTATE(d1, rotate_16); ROTATE(d2, rotate_16); \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \
+ ROTATE(b1, rotate_12); ROTATE(b2, rotate_12); \
+ PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \
+ ROTATE(d1, rotate_8); ROTATE(d2, rotate_8); \
+ PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \
+ ROTATE(b1, rotate_7); ROTATE(b2, rotate_7);
+
+unsigned int ASM_FUNC_ATTR
+_gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src,
+ size_t nblks)
+{
+ vector4x_u32 counters_0123 = { 0, 1, 2, 3 };
+ vector4x_u32 counter_4 = { 4, 0, 0, 0 };
+ vector4x_u32 rotate_16 = { 16, 16, 16, 16 };
+ vector4x_u32 rotate_12 = { 12, 12, 12, 12 };
+ vector4x_u32 rotate_8 = { 8, 8, 8, 8 };
+ vector4x_u32 rotate_7 = { 7, 7, 7, 7 };
+ vector4x_u32 state0, state1, state2, state3;
+ vector4x_u32 v0, v1, v2, v3, v4, v5, v6, v7;
+ vector4x_u32 v8, v9, v10, v11, v12, v13, v14, v15;
+ vector4x_u32 tmp;
+ int i;
+
+ /* force preload of constants to vector registers */
+ __asm__ ("": "+v" (counters_0123) :: "memory");
+ __asm__ ("": "+v" (counter_4) :: "memory");
+ __asm__ ("": "+v" (rotate_16) :: "memory");
+ __asm__ ("": "+v" (rotate_12) :: "memory");
+ __asm__ ("": "+v" (rotate_8) :: "memory");
+ __asm__ ("": "+v" (rotate_7) :: "memory");
+
+ state0 = vec_vsx_ld(0 * 16, state);
+ state1 = vec_vsx_ld(1 * 16, state);
+ state2 = vec_vsx_ld(2 * 16, state);
+ state3 = vec_vsx_ld(3 * 16, state);
+
+ do
+ {
+ v0 = vec_splat(state0, 0);
+ v1 = vec_splat(state0, 1);
+ v2 = vec_splat(state0, 2);
+ v3 = vec_splat(state0, 3);
+ v4 = vec_splat(state1, 0);
+ v5 = vec_splat(state1, 1);
+ v6 = vec_splat(state1, 2);
+ v7 = vec_splat(state1, 3);
+ v8 = vec_splat(state2, 0);
+ v9 = vec_splat(state2, 1);
+ v10 = vec_splat(state2, 2);
+ v11 = vec_splat(state2, 3);
+ v12 = vec_splat(state3, 0);
+ v13 = vec_splat(state3, 1);
+ v14 = vec_splat(state3, 2);
+ v15 = vec_splat(state3, 3);
+
+ v12 += counters_0123;
+ v13 -= vec_cmplt(v12, counters_0123);
+
+ for (i = 20; i > 0; i -= 2)
+ {
+ QUARTERROUND2(v0, v4, v8, v12, v1, v5, v9, v13)
+ QUARTERROUND2(v2, v6, v10, v14, v3, v7, v11, v15)
+ QUARTERROUND2(v0, v5, v10, v15, v1, v6, v11, v12)
+ QUARTERROUND2(v2, v7, v8, v13, v3, v4, v9, v14)
+ }
+
+ v0 += vec_splat(state0, 0);
+ v1 += vec_splat(state0, 1);
+ v2 += vec_splat(state0, 2);
+ v3 += vec_splat(state0, 3);
+ v4 += vec_splat(state1, 0);
+ v5 += vec_splat(state1, 1);
+ v6 += vec_splat(state1, 2);
+ v7 += vec_splat(state1, 3);
+ v8 += vec_splat(state2, 0);
+ v9 += vec_splat(state2, 1);
+ v10 += vec_splat(state2, 2);
+ v11 += vec_splat(state2, 3);
+ tmp = vec_splat(state3, 0);
+ tmp += counters_0123;
+ v12 += tmp;
+ v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123);
+ v14 += vec_splat(state3, 2);
+ v15 += vec_splat(state3, 3);
+ ADD_U64(state3, counter_4); /* update counter */
+
+ transpose_4x4(v0, v1, v2, v3);
+ transpose_4x4(v4, v5, v6, v7);
+ transpose_4x4(v8, v9, v10, v11);
+ transpose_4x4(v12, v13, v14, v15);
+
+ v0 ^= vec_load_le((64 * 0 + 16 * 0), src);
+ v1 ^= vec_load_le((64 * 1 + 16 * 0), src);
+ v2 ^= vec_load_le((64 * 2 + 16 * 0), src);
+ v3 ^= vec_load_le((64 * 3 + 16 * 0), src);
+
+ v4 ^= vec_load_le((64 * 0 + 16 * 1), src);
+ v5 ^= vec_load_le((64 * 1 + 16 * 1), src);
+ v6 ^= vec_load_le((64 * 2 + 16 * 1), src);
+ v7 ^= vec_load_le((64 * 3 + 16 * 1), src);
+
+ v8 ^= vec_load_le((64 * 0 + 16 * 2), src);
+ v9 ^= vec_load_le((64 * 1 + 16 * 2), src);
+ v10 ^= vec_load_le((64 * 2 + 16 * 2), src);
+ v11 ^= vec_load_le((64 * 3 + 16 * 2), src);
+
+ v12 ^= vec_load_le((64 * 0 + 16 * 3), src);
+ v13 ^= vec_load_le((64 * 1 + 16 * 3), src);
+ v14 ^= vec_load_le((64 * 2 + 16 * 3), src);
+ v15 ^= vec_load_le((64 * 3 + 16 * 3), src);
+
+ vec_store_le(v0, (64 * 0 + 16 * 0), dst);
+ vec_store_le(v1, (64 * 1 + 16 * 0), dst);
+ vec_store_le(v2, (64 * 2 + 16 * 0), dst);
+ vec_store_le(v3, (64 * 3 + 16 * 0), dst);
+
+ vec_store_le(v4, (64 * 0 + 16 * 1), dst);
+ vec_store_le(v5, (64 * 1 + 16 * 1), dst);
+ vec_store_le(v6, (64 * 2 + 16 * 1), dst);
+ vec_store_le(v7, (64 * 3 + 16 * 1), dst);
+
+ vec_store_le(v8, (64 * 0 + 16 * 2), dst);
+ vec_store_le(v9, (64 * 1 + 16 * 2), dst);
+ vec_store_le(v10, (64 * 2 + 16 * 2), dst);
+ vec_store_le(v11, (64 * 3 + 16 * 2), dst);
+
+ vec_store_le(v12, (64 * 0 + 16 * 3), dst);
+ vec_store_le(v13, (64 * 1 + 16 * 3), dst);
+ vec_store_le(v14, (64 * 2 + 16 * 3), dst);
+ vec_store_le(v15, (64 * 3 + 16 * 3), dst);
+
+ src += 4*64;
+ dst += 4*64;
+
+ nblks -= 4;
+ }
+ while (nblks);
+
+ vec_vsx_st(state3, 3 * 16, state); /* store counter */
+
+ return 0;
+}
+
+
+#if SIZEOF_UNSIGNED_LONG == 8
+
+/**********************************************************************
+ 4-way stitched chacha20-poly1305
+ **********************************************************************/
+
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+ __asm__ ("addc %0, %3, %0\n" \
+ "adde %1, %4, %1\n" \
+ "adde %2, %5, %2\n" \
+ : "+r" (A0), "+r" (A1), "+r" (A2) \
+ : "r" (B0), "r" (B1), "r" (B2) \
+ : "cc" )
+
+#define MUL_MOD_1305_64_PART1(H2, H1, H0, R1, R0, R1_MULT5) do { \
+ /* x = a * r (partial mod 2^130-5) */ \
+ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \
+ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \
+ \
+ umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \
+ } while (0)
+
+#define MUL_MOD_1305_64_PART2(H2, H1, H0, R1, R0, R1_MULT5) do { \
+ add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \
+ umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \
+ add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \
+ \
+ t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \
+ t1_hi = H2 * R0; /* h2 * r0 */ \
+ add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \
+ \
+ /* carry propagation */ \
+ H2 = H0 & 3; \
+ H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \
+ ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
+ } while (0)
+
+#define POLY1305_BLOCK_PART1(in_pos) do { \
+ m0 = buf_get_le64(poly1305_src + (in_pos) + 0); \
+ m1 = buf_get_le64(poly1305_src + (in_pos) + 8); \
+ /* a = h + m */ \
+ ADD_1305_64(h2, h1, h0, m2, m1, m0); \
+ /* h = a * r (partial mod 2^130-5) */ \
+ MUL_MOD_1305_64_PART1(h2, h1, h0, r1, r0, r1_mult5); \
+ } while (0)
+
+#define POLY1305_BLOCK_PART2(in_pos) do { \
+ MUL_MOD_1305_64_PART2(h2, h1, h0, r1, r0, r1_mult5); \
+ } while (0)
+
+unsigned int ASM_FUNC_ATTR
+_gcry_chacha20_poly1305_ppc8_blocks4(u32 *state, byte *dst, const byte *src,
+ size_t nblks, POLY1305_STATE *st,
+ const byte *poly1305_src)
+{
+ vector4x_u32 counters_0123 = { 0, 1, 2, 3 };
+ vector4x_u32 counter_4 = { 4, 0, 0, 0 };
+ vector4x_u32 rotate_16 = { 16, 16, 16, 16 };
+ vector4x_u32 rotate_12 = { 12, 12, 12, 12 };
+ vector4x_u32 rotate_8 = { 8, 8, 8, 8 };
+ vector4x_u32 rotate_7 = { 7, 7, 7, 7 };
+ vector4x_u32 state0, state1, state2, state3;
+ vector4x_u32 v0, v1, v2, v3, v4, v5, v6, v7;
+ vector4x_u32 v8, v9, v10, v11, v12, v13, v14, v15;
+ vector4x_u32 tmp;
+ u64 r0, r1, r1_mult5;
+ u64 h0, h1, h2;
+ u64 m0, m1, m2;
+ u64 x0_lo, x0_hi, x1_lo, x1_hi;
+ u64 t0_lo, t0_hi, t1_lo, t1_hi;
+ unsigned int i, o;
+
+ /* load poly1305 state */
+ m2 = 1;
+ h0 = st->h[0] + ((u64)st->h[1] << 32);
+ h1 = st->h[2] + ((u64)st->h[3] << 32);
+ h2 = st->h[4];
+ r0 = st->r[0] + ((u64)st->r[1] << 32);
+ r1 = st->r[2] + ((u64)st->r[3] << 32);
+ r1_mult5 = (r1 >> 2) + r1;
+
+ /* force preload of constants to vector registers */
+ __asm__ ("": "+v" (counters_0123) :: "memory");
+ __asm__ ("": "+v" (counter_4) :: "memory");
+ __asm__ ("": "+v" (rotate_16) :: "memory");
+ __asm__ ("": "+v" (rotate_12) :: "memory");
+ __asm__ ("": "+v" (rotate_8) :: "memory");
+ __asm__ ("": "+v" (rotate_7) :: "memory");
+
+ state0 = vec_vsx_ld(0 * 16, state);
+ state1 = vec_vsx_ld(1 * 16, state);
+ state2 = vec_vsx_ld(2 * 16, state);
+ state3 = vec_vsx_ld(3 * 16, state);
+
+ do
+ {
+ v0 = vec_splat(state0, 0);
+ v1 = vec_splat(state0, 1);
+ v2 = vec_splat(state0, 2);
+ v3 = vec_splat(state0, 3);
+ v4 = vec_splat(state1, 0);
+ v5 = vec_splat(state1, 1);
+ v6 = vec_splat(state1, 2);
+ v7 = vec_splat(state1, 3);
+ v8 = vec_splat(state2, 0);
+ v9 = vec_splat(state2, 1);
+ v10 = vec_splat(state2, 2);
+ v11 = vec_splat(state2, 3);
+ v12 = vec_splat(state3, 0);
+ v13 = vec_splat(state3, 1);
+ v14 = vec_splat(state3, 2);
+ v15 = vec_splat(state3, 3);
+
+ v12 += counters_0123;
+ v13 -= vec_cmplt(v12, counters_0123);
+
+ for (o = 20; o; o -= 10)
+ {
+ for (i = 8; i; i -= 2)
+ {
+ POLY1305_BLOCK_PART1(0 * 16);
+ QUARTERROUND2(v0, v4, v8, v12, v1, v5, v9, v13)
+ POLY1305_BLOCK_PART2();
+ QUARTERROUND2(v2, v6, v10, v14, v3, v7, v11, v15)
+ POLY1305_BLOCK_PART1(1 * 16);
+ poly1305_src += 2 * 16;
+ QUARTERROUND2(v0, v5, v10, v15, v1, v6, v11, v12)
+ POLY1305_BLOCK_PART2();
+ QUARTERROUND2(v2, v7, v8, v13, v3, v4, v9, v14)
+ }
+
+ QUARTERROUND2(v0, v4, v8, v12, v1, v5, v9, v13)
+ QUARTERROUND2(v2, v6, v10, v14, v3, v7, v11, v15)
+ QUARTERROUND2(v0, v5, v10, v15, v1, v6, v11, v12)
+ QUARTERROUND2(v2, v7, v8, v13, v3, v4, v9, v14)
+ }
+
+ v0 += vec_splat(state0, 0);
+ v1 += vec_splat(state0, 1);
+ v2 += vec_splat(state0, 2);
+ v3 += vec_splat(state0, 3);
+ v4 += vec_splat(state1, 0);
+ v5 += vec_splat(state1, 1);
+ v6 += vec_splat(state1, 2);
+ v7 += vec_splat(state1, 3);
+ v8 += vec_splat(state2, 0);
+ v9 += vec_splat(state2, 1);
+ v10 += vec_splat(state2, 2);
+ v11 += vec_splat(state2, 3);
+ tmp = vec_splat(state3, 0);
+ tmp += counters_0123;
+ v12 += tmp;
+ v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123);
+ v14 += vec_splat(state3, 2);
+ v15 += vec_splat(state3, 3);
+ ADD_U64(state3, counter_4); /* update counter */
+
+ transpose_4x4(v0, v1, v2, v3);
+ transpose_4x4(v4, v5, v6, v7);
+ transpose_4x4(v8, v9, v10, v11);
+ transpose_4x4(v12, v13, v14, v15);
+
+ v0 ^= vec_load_le((64 * 0 + 16 * 0), src);
+ v1 ^= vec_load_le((64 * 1 + 16 * 0), src);
+ v2 ^= vec_load_le((64 * 2 + 16 * 0), src);
+ v3 ^= vec_load_le((64 * 3 + 16 * 0), src);
+
+ v4 ^= vec_load_le((64 * 0 + 16 * 1), src);
+ v5 ^= vec_load_le((64 * 1 + 16 * 1), src);
+ v6 ^= vec_load_le((64 * 2 + 16 * 1), src);
+ v7 ^= vec_load_le((64 * 3 + 16 * 1), src);
+
+ v8 ^= vec_load_le((64 * 0 + 16 * 2), src);
+ v9 ^= vec_load_le((64 * 1 + 16 * 2), src);
+ v10 ^= vec_load_le((64 * 2 + 16 * 2), src);
+ v11 ^= vec_load_le((64 * 3 + 16 * 2), src);
+
+ v12 ^= vec_load_le((64 * 0 + 16 * 3), src);
+ v13 ^= vec_load_le((64 * 1 + 16 * 3), src);
+ v14 ^= vec_load_le((64 * 2 + 16 * 3), src);
+ v15 ^= vec_load_le((64 * 3 + 16 * 3), src);
+
+ vec_store_le(v0, (64 * 0 + 16 * 0), dst);
+ vec_store_le(v1, (64 * 1 + 16 * 0), dst);
+ vec_store_le(v2, (64 * 2 + 16 * 0), dst);
+ vec_store_le(v3, (64 * 3 + 16 * 0), dst);
+
+ vec_store_le(v4, (64 * 0 + 16 * 1), dst);
+ vec_store_le(v5, (64 * 1 + 16 * 1), dst);
+ vec_store_le(v6, (64 * 2 + 16 * 1), dst);
+ vec_store_le(v7, (64 * 3 + 16 * 1), dst);
+
+ vec_store_le(v8, (64 * 0 + 16 * 2), dst);
+ vec_store_le(v9, (64 * 1 + 16 * 2), dst);
+ vec_store_le(v10, (64 * 2 + 16 * 2), dst);
+ vec_store_le(v11, (64 * 3 + 16 * 2), dst);
+
+ vec_store_le(v12, (64 * 0 + 16 * 3), dst);
+ vec_store_le(v13, (64 * 1 + 16 * 3), dst);
+ vec_store_le(v14, (64 * 2 + 16 * 3), dst);
+ vec_store_le(v15, (64 * 3 + 16 * 3), dst);
+
+ src += 4*64;
+ dst += 4*64;
+
+ nblks -= 4;
+ }
+ while (nblks);
+
+ vec_vsx_st(state3, 3 * 16, state); /* store counter */
+
+ /* store poly1305 state */
+ st->h[0] = h0;
+ st->h[1] = h0 >> 32;
+ st->h[2] = h1;
+ st->h[3] = h1 >> 32;
+ st->h[4] = h2;
+
+ return 0;
+}
+
+#endif /* SIZEOF_UNSIGNED_LONG == 8 */
+
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
diff --git a/comm/third_party/libgcrypt/cipher/chacha20-s390x.S b/comm/third_party/libgcrypt/cipher/chacha20-s390x.S
new file mode 100644
index 0000000000..9b1d59c6ad
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/chacha20-s390x.S
@@ -0,0 +1,1561 @@
+/* chacha20-s390x.S - zSeries implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_S390X_VX)
+
+#include "asm-common-s390x.h"
+#include "asm-poly1305-s390x.h"
+
+.machine "z13+vx"
+.text
+
+.balign 16
+.Lconsts:
+.Lwordswap:
+ .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+.Lbswap128:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.Lbswap32:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lone:
+ .long 0, 0, 0, 1
+.Ladd_counter_0123:
+ .long 0, 1, 2, 3
+.Ladd_counter_4567:
+ .long 4, 5, 6, 7
+
+/* register macros */
+#define INPUT %r2
+#define DST %r3
+#define SRC %r4
+#define NBLKS %r0
+#define ROUND %r1
+
+/* stack structure */
+
+#define STACK_FRAME_STD (8 * 16 + 8 * 4)
+#define STACK_FRAME_F8_F15 (8 * 8)
+#define STACK_FRAME_Y0_Y15 (16 * 16)
+#define STACK_FRAME_CTR (4 * 16)
+#define STACK_FRAME_PARAMS (6 * 8)
+
+#define STACK_MAX (STACK_FRAME_STD + STACK_FRAME_F8_F15 + \
+ STACK_FRAME_Y0_Y15 + STACK_FRAME_CTR + \
+ STACK_FRAME_PARAMS)
+
+#define STACK_F8 (STACK_MAX - STACK_FRAME_F8_F15)
+#define STACK_F9 (STACK_F8 + 8)
+#define STACK_F10 (STACK_F9 + 8)
+#define STACK_F11 (STACK_F10 + 8)
+#define STACK_F12 (STACK_F11 + 8)
+#define STACK_F13 (STACK_F12 + 8)
+#define STACK_F14 (STACK_F13 + 8)
+#define STACK_F15 (STACK_F14 + 8)
+#define STACK_Y0_Y15 (STACK_F8 - STACK_FRAME_Y0_Y15)
+#define STACK_CTR (STACK_Y0_Y15 - STACK_FRAME_CTR)
+#define STACK_INPUT (STACK_CTR - STACK_FRAME_PARAMS)
+#define STACK_DST (STACK_INPUT + 8)
+#define STACK_SRC (STACK_DST + 8)
+#define STACK_NBLKS (STACK_SRC + 8)
+#define STACK_POCTX (STACK_NBLKS + 8)
+#define STACK_POSRC (STACK_POCTX + 8)
+
+#define STACK_G0_H3 STACK_Y0_Y15
+
+/* vector registers */
+#define A0 %v0
+#define A1 %v1
+#define A2 %v2
+#define A3 %v3
+
+#define B0 %v4
+#define B1 %v5
+#define B2 %v6
+#define B3 %v7
+
+#define C0 %v8
+#define C1 %v9
+#define C2 %v10
+#define C3 %v11
+
+#define D0 %v12
+#define D1 %v13
+#define D2 %v14
+#define D3 %v15
+
+#define E0 %v16
+#define E1 %v17
+#define E2 %v18
+#define E3 %v19
+
+#define F0 %v20
+#define F1 %v21
+#define F2 %v22
+#define F3 %v23
+
+#define G0 %v24
+#define G1 %v25
+#define G2 %v26
+#define G3 %v27
+
+#define H0 %v28
+#define H1 %v29
+#define H2 %v30
+#define H3 %v31
+
+#define IO0 E0
+#define IO1 E1
+#define IO2 E2
+#define IO3 E3
+#define IO4 F0
+#define IO5 F1
+#define IO6 F2
+#define IO7 F3
+
+#define S0 G0
+#define S1 G1
+#define S2 G2
+#define S3 G3
+
+#define TMP0 H0
+#define TMP1 H1
+#define TMP2 H2
+#define TMP3 H3
+
+#define X0 A0
+#define X1 A1
+#define X2 A2
+#define X3 A3
+#define X4 B0
+#define X5 B1
+#define X6 B2
+#define X7 B3
+#define X8 C0
+#define X9 C1
+#define X10 C2
+#define X11 C3
+#define X12 D0
+#define X13 D1
+#define X14 D2
+#define X15 D3
+
+#define Y0 E0
+#define Y1 E1
+#define Y2 E2
+#define Y3 E3
+#define Y4 F0
+#define Y5 F1
+#define Y6 F2
+#define Y7 F3
+#define Y8 G0
+#define Y9 G1
+#define Y10 G2
+#define Y11 G3
+#define Y12 H0
+#define Y13 H1
+#define Y14 H2
+#define Y15 H3
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+#define _ /*_*/
+
+#define CLEAR(x,...) vzero x;
+
+#define START_STACK(last_r) \
+ lgr %r0, %r15; \
+ lghi %r1, ~15; \
+ stmg %r6, last_r, 6 * 8(%r15); \
+ aghi %r0, -STACK_MAX; \
+ ngr %r0, %r1; \
+ lgr %r1, %r15; \
+ CFI_DEF_CFA_REGISTER(1); \
+ lgr %r15, %r0; \
+ stg %r1, 0(%r15); \
+ CFI_CFA_ON_STACK(0, 0); \
+ std %f8, STACK_F8(%r15); \
+ std %f9, STACK_F9(%r15); \
+ std %f10, STACK_F10(%r15); \
+ std %f11, STACK_F11(%r15); \
+ std %f12, STACK_F12(%r15); \
+ std %f13, STACK_F13(%r15); \
+ std %f14, STACK_F14(%r15); \
+ std %f15, STACK_F15(%r15);
+
+#define END_STACK(last_r) \
+ lg %r1, 0(%r15); \
+ ld %f8, STACK_F8(%r15); \
+ ld %f9, STACK_F9(%r15); \
+ ld %f10, STACK_F10(%r15); \
+ ld %f11, STACK_F11(%r15); \
+ ld %f12, STACK_F12(%r15); \
+ ld %f13, STACK_F13(%r15); \
+ ld %f14, STACK_F14(%r15); \
+ ld %f15, STACK_F15(%r15); \
+ lmg %r6, last_r, 6 * 8(%r1); \
+ lgr %r15, %r1; \
+ CFI_DEF_CFA_REGISTER(DW_REGNO_SP);
+
+#define PLUS(dst,src) \
+ vaf dst, dst, src;
+
+#define XOR(dst,src) \
+ vx dst, dst, src;
+
+#define ROTATE(v1,c) \
+ verllf v1, v1, (c)(0);
+
+#define WORD_ROTATE(v1,s) \
+ vsldb v1, v1, v1, ((s) * 4);
+
+#define DST_1(OPER, I, J) \
+ OPER(A##I, J);
+
+#define DST_2(OPER, I, J) \
+ OPER(A##I, J); OPER(B##I, J);
+
+#define DST_4(OPER, I, J) \
+ OPER(A##I, J); OPER(B##I, J); OPER(C##I, J); OPER(D##I, J);
+
+#define DST_8(OPER, I, J) \
+ OPER(A##I, J); OPER(B##I, J); OPER(C##I, J); OPER(D##I, J); \
+ OPER(E##I, J); OPER(F##I, J); OPER(G##I, J); OPER(H##I, J);
+
+#define DST_SRC_1(OPER, I, J) \
+ OPER(A##I, A##J);
+
+#define DST_SRC_2(OPER, I, J) \
+ OPER(A##I, A##J); OPER(B##I, B##J);
+
+#define DST_SRC_4(OPER, I, J) \
+ OPER(A##I, A##J); OPER(B##I, B##J); OPER(C##I, C##J); \
+ OPER(D##I, D##J);
+
+#define DST_SRC_8(OPER, I, J) \
+ OPER(A##I, A##J); OPER(B##I, B##J); OPER(C##I, C##J); \
+ OPER(D##I, D##J); OPER(E##I, E##J); OPER(F##I, F##J); \
+ OPER(G##I, G##J); OPER(H##I, H##J);
+
+/**********************************************************************
+ round macros
+ **********************************************************************/
+
+#define QUARTERROUND4_POLY(wrot_1,wrot_2,wrot_3,op1,op2) \
+ op1; DST_SRC_1(PLUS, 0, 1); DST_SRC_1(XOR, 3, 0); DST_1(ROTATE, 3, 16); \
+ DST_SRC_1(PLUS, 2, 3); DST_SRC_1(XOR, 1, 2); DST_1(ROTATE, 1, 12); \
+ DST_SRC_1(PLUS, 0, 1); DST_SRC_1(XOR, 3, 0); DST_1(ROTATE, 3, 8); \
+ op2; DST_SRC_1(PLUS, 2, 3); DST_SRC_1(XOR, 1, 2); DST_1(ROTATE, 1, 7); \
+ DST_1(WORD_ROTATE, 3, wrot_3); \
+ DST_1(WORD_ROTATE, 2, wrot_2); \
+ DST_1(WORD_ROTATE, 1, wrot_1);
+
+#define QUARTERROUND4(wrot_1,wrot_2,wrot_3) \
+ QUARTERROUND4_POLY(wrot_1,wrot_2,wrot_3,,)
+
+#define QUARTERROUND4_2_POLY(wrot_1,wrot_2,wrot_3,op1,op2,op3,op4) \
+ op1; DST_SRC_2(PLUS, 0, 1); DST_SRC_2(XOR, 3, 0); DST_2(ROTATE, 3, 16); \
+ DST_SRC_2(PLUS, 2, 3); op2; DST_SRC_2(XOR, 1, 2); DST_2(ROTATE, 1, 12); \
+ DST_SRC_2(PLUS, 0, 1); DST_SRC_2(XOR, 3, 0); op3; DST_2(ROTATE, 3, 8); \
+ DST_SRC_2(PLUS, 2, 3); DST_SRC_2(XOR, 1, 2); DST_2(ROTATE, 1, 7); op4; \
+ DST_2(WORD_ROTATE, 3, wrot_3); \
+ DST_2(WORD_ROTATE, 2, wrot_2); \
+ DST_2(WORD_ROTATE, 1, wrot_1);
+
+#define QUARTERROUND4_2(wrot_1,wrot_2,wrot_3) \
+ QUARTERROUND4_2_POLY(wrot_1,wrot_2,wrot_3,,,,)
+
+#define QUARTERROUND4_4_POLY(wrot_1,wrot_2,wrot_3,op1,op2,op3,op4,op5,op6) \
+ DST_SRC_4(PLUS, 0, 1); DST_SRC_4(XOR, 3, 0); op1; DST_4(ROTATE, 3, 16); \
+ DST_SRC_4(PLUS, 2, 3); op2; DST_SRC_4(XOR, 1, 2); DST_4(ROTATE, 1, 12); \
+ op3; DST_SRC_4(PLUS, 0, 1); DST_SRC_4(XOR, 3, 0); op4; DST_4(ROTATE, 3, 8); \
+ DST_SRC_4(PLUS, 2, 3); op5; DST_SRC_4(XOR, 1, 2); DST_4(ROTATE, 1, 7); \
+ op6; \
+ DST_4(WORD_ROTATE, 3, wrot_3); \
+ DST_4(WORD_ROTATE, 2, wrot_2); \
+ DST_4(WORD_ROTATE, 1, wrot_1);
+
+#define QUARTERROUND4_4(wrot_1,wrot_2,wrot_3) \
+ QUARTERROUND4_4_POLY(wrot_1,wrot_2,wrot_3,,,,,,)
+
+/**********************************************************************
+ 4-way && 2-way && 1-way chacha20 ("horizontal")
+ **********************************************************************/
+
+.balign 8
+.globl _gcry_chacha20_s390x_vx_blocks4_2_1
+ELF(.type _gcry_chacha20_s390x_vx_blocks4_2_1,@function;)
+
+_gcry_chacha20_s390x_vx_blocks4_2_1:
+ /* input:
+ * %r2: input
+ * %r3: dst
+ * %r4: src
+ * %r5: nblks
+ */
+ CFI_STARTPROC();
+
+ START_STACK(%r7);
+ lgr NBLKS, %r5;
+
+ /* Load constants. */
+ larl %r7, .Lconsts;
+ vl TMP0, (.Lwordswap - .Lconsts)(%r7);
+ vl TMP1, (.Lone - .Lconsts)(%r7);
+ vl TMP2, (.Lbswap128 - .Lconsts)(%r7);
+
+ /* Load state. */
+ vlm S0, S3, 0(INPUT);
+ vperm S0, S0, S0, TMP0;
+ vperm S1, S1, S1, TMP0;
+ vperm S2, S2, S2, TMP0;
+ vperm S3, S3, S3, TMP0;
+
+ clgijl NBLKS, 4, .Lloop2;
+
+.balign 4
+.Lloop4:
+ /* Process four chacha20 blocks. */
+ vlr TMP3, S3;
+ lghi ROUND, (20 / 2);
+ vlr A0, S0;
+ vlr A1, S1;
+ vlr A2, S2;
+ vlr A3, TMP3;
+ vag TMP3, TMP3, TMP1;
+ vlr B0, S0;
+ vlr B1, S1;
+ vlr B2, S2;
+ vlr B3, TMP3;
+ vag TMP3, TMP3, TMP1;
+ vlr C0, S0;
+ vlr C1, S1;
+ vlr C2, S2;
+ vlr C3, TMP3;
+ vlr D0, S0;
+ vlr D1, S1;
+ vlr D2, S2;
+ vag D3, TMP3, TMP1;
+
+ slgfi NBLKS, 4;
+
+.balign 4
+.Lround2_4:
+ QUARTERROUND4_4(3, 2, 1);
+ QUARTERROUND4_4(1, 2, 3);
+ brctg ROUND, .Lround2_4;
+
+ vlm IO0, IO7, 0(SRC);
+
+ PLUS(A0, S0);
+ PLUS(A1, S1);
+ PLUS(A2, S2);
+ PLUS(A3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ PLUS(B0, S0);
+ PLUS(B1, S1);
+ PLUS(B2, S2);
+ PLUS(B3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ vperm A0, A0, A0, TMP2;
+ vperm A1, A1, A1, TMP2;
+ vperm A2, A2, A2, TMP2;
+ vperm A3, A3, A3, TMP2;
+ vperm B0, B0, B0, TMP2;
+ vperm B1, B1, B1, TMP2;
+ vperm B2, B2, B2, TMP2;
+ vperm B3, B3, B3, TMP2;
+ PLUS(C0, S0);
+ PLUS(C1, S1);
+ PLUS(C2, S2);
+ PLUS(C3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ PLUS(D0, S0);
+ PLUS(D1, S1);
+ PLUS(D2, S2);
+ PLUS(D3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ vperm C0, C0, C0, TMP2;
+ vperm C1, C1, C1, TMP2;
+ vperm C2, C2, C2, TMP2;
+ vperm C3, C3, C3, TMP2;
+ vperm D0, D0, D0, TMP2;
+ vperm D1, D1, D1, TMP2;
+ vperm D2, D2, D2, TMP2;
+ vperm D3, D3, D3, TMP2;
+
+ XOR(IO0, A0);
+ XOR(IO1, A1);
+ XOR(IO2, A2);
+ XOR(IO3, A3);
+ XOR(IO4, B0);
+ XOR(IO5, B1);
+ XOR(IO6, B2);
+ XOR(IO7, B3);
+ vlm A0, B3, 128(SRC);
+ vstm IO0, IO7, 0(DST);
+ XOR(A0, C0);
+ XOR(A1, C1);
+ XOR(A2, C2);
+ XOR(A3, C3);
+ XOR(B0, D0);
+ XOR(B1, D1);
+ XOR(B2, D2);
+ XOR(B3, D3);
+ vstm A0, B3, 128(DST);
+
+ aghi SRC, 256;
+ aghi DST, 256;
+
+ clgijhe NBLKS, 4, .Lloop4;
+
+ CLEAR(C0);
+ CLEAR(C1);
+ CLEAR(C2);
+ CLEAR(C3);
+ CLEAR(D0);
+ CLEAR(D1);
+ CLEAR(D2);
+ CLEAR(D3);
+
+.balign 4
+.Lloop2:
+ clgijl NBLKS, 2, .Lloop1;
+
+ /* Process two chacha20 blocks. */
+ lghi ROUND, (20 / 2);
+ vlr A0, S0;
+ vlr A1, S1;
+ vlr A2, S2;
+ vlr A3, S3;
+ vlr B0, S0;
+ vlr B1, S1;
+ vlr B2, S2;
+ vag B3, S3, TMP1;
+
+ slgfi NBLKS, 2;
+
+.balign 4
+.Lround2_2:
+ QUARTERROUND4_2(3, 2, 1);
+ QUARTERROUND4_2(1, 2, 3);
+ brctg ROUND, .Lround2_2;
+
+ vlm IO0, IO7, 0(SRC);
+
+ PLUS(A0, S0);
+ PLUS(A1, S1);
+ PLUS(A2, S2);
+ PLUS(A3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ PLUS(B0, S0);
+ PLUS(B1, S1);
+ PLUS(B2, S2);
+ PLUS(B3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ vperm A0, A0, A0, TMP2;
+ vperm A1, A1, A1, TMP2;
+ vperm A2, A2, A2, TMP2;
+ vperm A3, A3, A3, TMP2;
+ vperm B0, B0, B0, TMP2;
+ vperm B1, B1, B1, TMP2;
+ vperm B2, B2, B2, TMP2;
+ vperm B3, B3, B3, TMP2;
+
+ XOR(IO0, A0);
+ XOR(IO1, A1);
+ XOR(IO2, A2);
+ XOR(IO3, A3);
+ XOR(IO4, B0);
+ XOR(IO5, B1);
+ XOR(IO6, B2);
+ XOR(IO7, B3);
+ vstm IO0, IO7, 0(DST);
+
+ aghi SRC, 128;
+ aghi DST, 128;
+
+ clgijhe NBLKS, 2, .Lloop2;
+
+ CLEAR(B0);
+ CLEAR(B1);
+ CLEAR(B2);
+ CLEAR(B3);
+
+.balign 4
+.Lloop1:
+ clgijl NBLKS, 1, .Ldone;
+
+ /* Process one chacha20 block.*/
+ lghi ROUND, (20 / 2);
+ vlr A0, S0;
+ vlr A1, S1;
+ vlr A2, S2;
+ vlr A3, S3;
+
+ slgfi NBLKS, 1;
+
+.balign 4
+.Lround2_1:
+ QUARTERROUND4(3, 2, 1);
+ QUARTERROUND4(1, 2, 3);
+ brct ROUND, .Lround2_1;
+
+ vlm IO0, IO3, 0(SRC);
+
+ PLUS(A0, S0);
+ PLUS(A1, S1);
+ PLUS(A2, S2);
+ PLUS(A3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+
+ vperm A0, A0, A0, TMP2;
+ vperm A1, A1, A1, TMP2;
+ vperm A2, A2, A2, TMP2;
+ vperm A3, A3, A3, TMP2;
+ XOR(IO0, A0);
+ XOR(IO1, A1);
+ XOR(IO2, A2);
+ XOR(IO3, A3);
+ vstm IO0, IO3, 0(DST);
+
+ aghi SRC, 64;
+ aghi DST, 64;
+
+ clgijhe NBLKS, 1, .Lloop1;
+
+.balign 4
+.Ldone:
+ /* Store counter. */
+ vperm S3, S3, S3, TMP0;
+ vst S3, (48)(INPUT);
+
+ /* Clear the used vector registers. */
+ CLEAR(A0);
+ CLEAR(A1);
+ CLEAR(A2);
+ CLEAR(A3);
+ CLEAR(IO0);
+ CLEAR(IO1);
+ CLEAR(IO2);
+ CLEAR(IO3);
+ CLEAR(IO4);
+ CLEAR(IO5);
+ CLEAR(IO6);
+ CLEAR(IO7);
+ CLEAR(TMP0);
+ CLEAR(TMP1);
+ CLEAR(TMP2);
+
+ END_STACK(%r7);
+ xgr %r2, %r2;
+ br %r14;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_s390x_vx_blocks4_2_1,
+ .-_gcry_chacha20_s390x_vx_blocks4_2_1;)
+
+/**********************************************************************
+ 4-way && 2-way && 1-way stitched chacha20-poly1305 ("horizontal")
+ **********************************************************************/
+
+.balign 8
+.globl _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1
+ELF(.type _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1,@function;)
+
+_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1:
+ /* input:
+ * %r2: input
+ * %r3: dst
+ * %r4: src
+ * %r5: nblks
+ * %r6: poly1305 state
+ * 160(%r15): poly1305 src
+ */
+ CFI_STARTPROC();
+
+ START_STACK(%r14);
+ lgr NBLKS, %r5;
+
+ /* Load constants. */
+ larl %r8, .Lconsts;
+ vl TMP0, (.Lwordswap - .Lconsts)(%r8);
+ vl TMP1, (.Lone - .Lconsts)(%r8);
+ vl TMP2, (.Lbswap128 - .Lconsts)(%r8);
+
+ /* Load state. */
+ vlm S0, S3, 0(INPUT);
+ vperm S0, S0, S0, TMP0;
+ vperm S1, S1, S1, TMP0;
+ vperm S2, S2, S2, TMP0;
+ vperm S3, S3, S3, TMP0;
+
+ /* Store parameters to stack. */
+ stmg %r2, %r6, STACK_INPUT(%r15);
+
+ lgr POLY_RSTATE, %r6;
+ lgr NBLKS, %r5;
+
+ lg POLY_RSRC, 0(%r15);
+ lg POLY_RSRC, 160(POLY_RSRC);
+ stg POLY_RSRC, STACK_POSRC(%r15);
+
+ /* Load poly1305 state */
+ POLY1305_LOAD_STATE();
+
+ clgijl NBLKS, 4, .Lloop2_poly;
+
+.balign 4
+.Lloop4_poly:
+ /* Process four chacha20 blocks and 16 poly1305 blocks. */
+ vlr TMP3, S3;
+ lghi ROUND, (20 / 4);
+ vlr A0, S0;
+ vlr A1, S1;
+ vlr A2, S2;
+ vlr A3, TMP3;
+ vag TMP3, TMP3, TMP1;
+ vlr B0, S0;
+ vlr B1, S1;
+ vlr B2, S2;
+ vlr B3, TMP3;
+ vag TMP3, TMP3, TMP1;
+ vlr C0, S0;
+ vlr C1, S1;
+ vlr C2, S2;
+ vlr C3, TMP3;
+ vlr D0, S0;
+ vlr D1, S1;
+ vlr D2, S2;
+ vag D3, TMP3, TMP1;
+
+ slgfi NBLKS, 4;
+
+.balign 4
+.Lround4_4_poly:
+ /* Total 15 poly1305 blocks processed by this loop. */
+ QUARTERROUND4_4_POLY(3, 2, 1,
+ POLY1305_BLOCK_PART1(0 * 16),
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6());
+ QUARTERROUND4_4_POLY(1, 2, 3,
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8(),
+ POLY1305_BLOCK_PART1(1 * 16),
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4());
+ QUARTERROUND4_4_POLY(3, 2, 1,
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8(),
+ POLY1305_BLOCK_PART1(2 * 16);
+ INC_POLY1305_SRC(3 * 16),
+ POLY1305_BLOCK_PART2());
+ QUARTERROUND4_4_POLY(1, 2, 3,
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8());
+ brctg ROUND, .Lround4_4_poly;
+
+ POLY1305_BLOCK_PART1(0 * 16);
+ INC_POLY1305_SRC(1 * 16);
+ stg POLY_RSRC, STACK_POSRC(%r15);
+
+ lg %r14, STACK_SRC(%r15);
+ vlm IO0, IO7, 0(%r14);
+
+ PLUS(A0, S0);
+ PLUS(A1, S1);
+ PLUS(A2, S2);
+ PLUS(A3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ POLY1305_BLOCK_PART2();
+ PLUS(B0, S0);
+ PLUS(B1, S1);
+ PLUS(B2, S2);
+ PLUS(B3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ POLY1305_BLOCK_PART3();
+ vperm A0, A0, A0, TMP2;
+ vperm A1, A1, A1, TMP2;
+ vperm A2, A2, A2, TMP2;
+ vperm A3, A3, A3, TMP2;
+ vperm B0, B0, B0, TMP2;
+ vperm B1, B1, B1, TMP2;
+ vperm B2, B2, B2, TMP2;
+ vperm B3, B3, B3, TMP2;
+ POLY1305_BLOCK_PART4();
+ PLUS(C0, S0);
+ PLUS(C1, S1);
+ PLUS(C2, S2);
+ PLUS(C3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ PLUS(D0, S0);
+ PLUS(D1, S1);
+ PLUS(D2, S2);
+ PLUS(D3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ POLY1305_BLOCK_PART5();
+ vperm C0, C0, C0, TMP2;
+ vperm C1, C1, C1, TMP2;
+ vperm C2, C2, C2, TMP2;
+ vperm C3, C3, C3, TMP2;
+ vperm D0, D0, D0, TMP2;
+ vperm D1, D1, D1, TMP2;
+ vperm D2, D2, D2, TMP2;
+ vperm D3, D3, D3, TMP2;
+
+ POLY1305_BLOCK_PART6();
+ XOR(IO0, A0);
+ XOR(IO1, A1);
+ XOR(IO2, A2);
+ XOR(IO3, A3);
+ XOR(IO4, B0);
+ XOR(IO5, B1);
+ XOR(IO6, B2);
+ XOR(IO7, B3);
+ vlm A0, B3, 128(%r14);
+ aghi %r14, 256;
+ stg %r14, STACK_SRC(%r15);
+
+ lg %r14, STACK_DST(%r15);
+ POLY1305_BLOCK_PART7();
+ vstm IO0, IO7, 0(%r14);
+ XOR(A0, C0);
+ XOR(A1, C1);
+ XOR(A2, C2);
+ XOR(A3, C3);
+ XOR(B0, D0);
+ XOR(B1, D1);
+ XOR(B2, D2);
+ XOR(B3, D3);
+ POLY1305_BLOCK_PART8();
+ vstm A0, B3, 128(%r14);
+ aghi %r14, 256;
+ stg %r14, STACK_DST(%r15);
+
+ lg POLY_RSRC, STACK_POSRC(%r15);
+
+ clgijhe NBLKS, 4, .Lloop4_poly;
+
+ CLEAR(C0);
+ CLEAR(C1);
+ CLEAR(C2);
+ CLEAR(C3);
+ CLEAR(D0);
+ CLEAR(D1);
+ CLEAR(D2);
+ CLEAR(D3);
+
+.balign 4
+.Lloop2_poly:
+ clgijl NBLKS, 2, .Lloop1_poly;
+
+ /* Process two chacha20 and eight poly1305 blocks. */
+ lghi ROUND, ((20 - 4) / 2);
+ vlr A0, S0;
+ vlr A1, S1;
+ vlr A2, S2;
+ vlr A3, S3;
+ vlr B0, S0;
+ vlr B1, S1;
+ vlr B2, S2;
+ vag B3, S3, TMP1;
+
+ slgfi NBLKS, 2;
+
+.balign 4
+.Lround4_2_poly:
+ /* Total eight poly1305 blocks processed by this loop. */
+ QUARTERROUND4_2_POLY(3, 2, 1,
+ POLY1305_BLOCK_PART1(0 * 16),
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4());
+ INC_POLY1305_SRC(1 * 16);
+ QUARTERROUND4_2_POLY(1, 2, 3,
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8());
+ brctg ROUND, .Lround4_2_poly;
+
+ stg POLY_RSRC, STACK_POSRC(%r15);
+ lg %r14, STACK_SRC(%r15);
+
+ QUARTERROUND4_2(3, 2, 1);
+ QUARTERROUND4_2(1, 2, 3);
+ QUARTERROUND4_2(3, 2, 1);
+ QUARTERROUND4_2(1, 2, 3);
+
+ vlm IO0, IO7, 0(%r14);
+ aghi %r14, 128;
+ stg %r14, STACK_SRC(%r15);
+
+ PLUS(A0, S0);
+ PLUS(A1, S1);
+ PLUS(A2, S2);
+ PLUS(A3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ PLUS(B0, S0);
+ PLUS(B1, S1);
+ PLUS(B2, S2);
+ PLUS(B3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+ vperm A0, A0, A0, TMP2;
+ vperm A1, A1, A1, TMP2;
+ vperm A2, A2, A2, TMP2;
+ vperm A3, A3, A3, TMP2;
+ vperm B0, B0, B0, TMP2;
+ vperm B1, B1, B1, TMP2;
+ vperm B2, B2, B2, TMP2;
+ vperm B3, B3, B3, TMP2;
+
+ lg %r14, STACK_DST(%r15);
+ XOR(IO0, A0);
+ XOR(IO1, A1);
+ XOR(IO2, A2);
+ XOR(IO3, A3);
+ XOR(IO4, B0);
+ XOR(IO5, B1);
+ XOR(IO6, B2);
+ XOR(IO7, B3);
+ vstm IO0, IO7, 0(%r14);
+ aghi %r14, 128;
+ stg %r14, STACK_DST(%r15);
+
+ lg POLY_RSRC, STACK_POSRC(%r15);
+
+ clgijhe NBLKS, 2, .Lloop2_poly;
+
+ CLEAR(B0);
+ CLEAR(B1);
+ CLEAR(B2);
+ CLEAR(B3);
+
+.balign 4
+.Lloop1_poly:
+ clgijl NBLKS, 1, .Ldone_poly;
+
+ /* Process one chacha20 block and four poly1305 blocks.*/
+ lghi ROUND, ((20 - 4) / 4);
+ vlr A0, S0;
+ vlr A1, S1;
+ vlr A2, S2;
+ vlr A3, S3;
+
+ slgfi NBLKS, 1;
+
+.balign 4
+.Lround4_1_poly:
+ /* Total four poly1305 blocks processed by this loop. */
+ QUARTERROUND4_POLY(3, 2, 1,
+ POLY1305_BLOCK_PART1(0 * 16),
+ POLY1305_BLOCK_PART2());
+ INC_POLY1305_SRC(1 * 16);
+ QUARTERROUND4_POLY(1, 2, 3,
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4());
+ QUARTERROUND4_POLY(3, 2, 1,
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6());
+ QUARTERROUND4_POLY(1, 2, 3,
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8());
+ brct ROUND, .Lround4_1_poly;
+
+ stg POLY_RSRC, STACK_POSRC(%r15);
+ lg %r14, STACK_SRC(%r15);
+
+ QUARTERROUND4(3, 2, 1);
+ QUARTERROUND4(1, 2, 3);
+ QUARTERROUND4(3, 2, 1);
+ QUARTERROUND4(1, 2, 3);
+
+ vlm IO0, IO3, 0(%r14);
+ aghi %r14, 64;
+ stg %r14, STACK_SRC(%r15);
+
+ PLUS(A0, S0);
+ PLUS(A1, S1);
+ PLUS(A2, S2);
+ PLUS(A3, S3);
+ vag S3, S3, TMP1; /* Update counter. */
+
+ lg %r14, STACK_DST(%r15);
+ vperm A0, A0, A0, TMP2;
+ vperm A1, A1, A1, TMP2;
+ vperm A2, A2, A2, TMP2;
+ vperm A3, A3, A3, TMP2;
+ XOR(IO0, A0);
+ XOR(IO1, A1);
+ XOR(IO2, A2);
+ XOR(IO3, A3);
+ vstm IO0, IO3, 0(%r14);
+ aghi %r14, 64;
+ stg %r14, STACK_DST(%r15);
+
+ lg POLY_RSRC, STACK_POSRC(%r15);
+
+ clgijhe NBLKS, 1, .Lloop1_poly;
+
+.balign 4
+.Ldone_poly:
+ /* Store poly1305 state */
+ lg POLY_RSTATE, STACK_POCTX(%r15);
+ POLY1305_STORE_STATE();
+
+ /* Store counter. */
+ lg INPUT, STACK_INPUT(%r15);
+ vperm S3, S3, S3, TMP0;
+ vst S3, (48)(INPUT);
+
+ /* Clear the used vector registers. */
+ CLEAR(A0);
+ CLEAR(A1);
+ CLEAR(A2);
+ CLEAR(A3);
+ CLEAR(IO0);
+ CLEAR(IO1);
+ CLEAR(IO2);
+ CLEAR(IO3);
+ CLEAR(IO4);
+ CLEAR(IO5);
+ CLEAR(IO6);
+ CLEAR(IO7);
+ CLEAR(TMP0);
+ CLEAR(TMP1);
+ CLEAR(TMP2);
+
+ END_STACK(%r14);
+ xgr %r2, %r2;
+ br %r14;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1,
+ .-_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1;)
+
+/**********************************************************************
+ 8-way chacha20 ("vertical")
+ **********************************************************************/
+
+#define QUARTERROUND4_V8_POLY(x0,x1,x2,x3,x4,x5,x6,x7,\
+ x8,x9,x10,x11,x12,x13,x14,x15,\
+ y0,y1,y2,y3,y4,y5,y6,y7,\
+ y8,y9,y10,y11,y12,y13,y14,y15,\
+ op1,op2,op3,op4,op5,op6,op7,op8,\
+ op9,op10,op11,op12) \
+ op1; \
+ PLUS(x0, x1); PLUS(x4, x5); \
+ PLUS(x8, x9); PLUS(x12, x13); \
+ PLUS(y0, y1); PLUS(y4, y5); \
+ PLUS(y8, y9); PLUS(y12, y13); \
+ op2; \
+ XOR(x3, x0); XOR(x7, x4); \
+ XOR(x11, x8); XOR(x15, x12); \
+ XOR(y3, y0); XOR(y7, y4); \
+ XOR(y11, y8); XOR(y15, y12); \
+ op3; \
+ ROTATE(x3, 16); ROTATE(x7, 16); \
+ ROTATE(x11, 16); ROTATE(x15, 16); \
+ ROTATE(y3, 16); ROTATE(y7, 16); \
+ ROTATE(y11, 16); ROTATE(y15, 16); \
+ op4; \
+ PLUS(x2, x3); PLUS(x6, x7); \
+ PLUS(x10, x11); PLUS(x14, x15); \
+ PLUS(y2, y3); PLUS(y6, y7); \
+ PLUS(y10, y11); PLUS(y14, y15); \
+ op5; \
+ XOR(x1, x2); XOR(x5, x6); \
+ XOR(x9, x10); XOR(x13, x14); \
+ XOR(y1, y2); XOR(y5, y6); \
+ XOR(y9, y10); XOR(y13, y14); \
+ op6; \
+ ROTATE(x1,12); ROTATE(x5,12); \
+ ROTATE(x9,12); ROTATE(x13,12); \
+ ROTATE(y1,12); ROTATE(y5,12); \
+ ROTATE(y9,12); ROTATE(y13,12); \
+ op7; \
+ PLUS(x0, x1); PLUS(x4, x5); \
+ PLUS(x8, x9); PLUS(x12, x13); \
+ PLUS(y0, y1); PLUS(y4, y5); \
+ PLUS(y8, y9); PLUS(y12, y13); \
+ op8; \
+ XOR(x3, x0); XOR(x7, x4); \
+ XOR(x11, x8); XOR(x15, x12); \
+ XOR(y3, y0); XOR(y7, y4); \
+ XOR(y11, y8); XOR(y15, y12); \
+ op9; \
+ ROTATE(x3,8); ROTATE(x7,8); \
+ ROTATE(x11,8); ROTATE(x15,8); \
+ ROTATE(y3,8); ROTATE(y7,8); \
+ ROTATE(y11,8); ROTATE(y15,8); \
+ op10; \
+ PLUS(x2, x3); PLUS(x6, x7); \
+ PLUS(x10, x11); PLUS(x14, x15); \
+ PLUS(y2, y3); PLUS(y6, y7); \
+ PLUS(y10, y11); PLUS(y14, y15); \
+ op11; \
+ XOR(x1, x2); XOR(x5, x6); \
+ XOR(x9, x10); XOR(x13, x14); \
+ XOR(y1, y2); XOR(y5, y6); \
+ XOR(y9, y10); XOR(y13, y14); \
+ op12; \
+ ROTATE(x1,7); ROTATE(x5,7); \
+ ROTATE(x9,7); ROTATE(x13,7); \
+ ROTATE(y1,7); ROTATE(y5,7); \
+ ROTATE(y9,7); ROTATE(y13,7);
+
+#define QUARTERROUND4_V8(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,\
+ y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15) \
+ QUARTERROUND4_V8_POLY(x0,x1,x2,x3,x4,x5,x6,x7,\
+ x8,x9,x10,x11,x12,x13,x14,x15,\
+ y0,y1,y2,y3,y4,y5,y6,y7,\
+ y8,y9,y10,y11,y12,y13,y14,y15,\
+ ,,,,,,,,,,,)
+
+#define TRANSPOSE_4X4_2(v0,v1,v2,v3,va,vb,vc,vd,tmp0,tmp1,tmp2,tmpa,tmpb,tmpc) \
+ vmrhf tmp0, v0, v1; \
+ vmrhf tmp1, v2, v3; \
+ vmrlf tmp2, v0, v1; \
+ vmrlf v3, v2, v3; \
+ vmrhf tmpa, va, vb; \
+ vmrhf tmpb, vc, vd; \
+ vmrlf tmpc, va, vb; \
+ vmrlf vd, vc, vd; \
+ vpdi v0, tmp0, tmp1, 0; \
+ vpdi v1, tmp0, tmp1, 5; \
+ vpdi v2, tmp2, v3, 0; \
+ vpdi v3, tmp2, v3, 5; \
+ vpdi va, tmpa, tmpb, 0; \
+ vpdi vb, tmpa, tmpb, 5; \
+ vpdi vc, tmpc, vd, 0; \
+ vpdi vd, tmpc, vd, 5;
+
+.balign 8
+.globl _gcry_chacha20_s390x_vx_blocks8
+ELF(.type _gcry_chacha20_s390x_vx_blocks8,@function;)
+
+_gcry_chacha20_s390x_vx_blocks8:
+ /* input:
+ * %r2: input
+ * %r3: dst
+ * %r4: src
+ * %r5: nblks (multiple of 8)
+ */
+ CFI_STARTPROC();
+
+ START_STACK(%r8);
+ lgr NBLKS, %r5;
+
+ larl %r7, .Lconsts;
+
+ /* Load counter. */
+ lg %r8, (12 * 4)(INPUT);
+ rllg %r8, %r8, 32;
+
+.balign 4
+ /* Process eight chacha20 blocks per loop. */
+.Lloop8:
+ vlm Y0, Y3, 0(INPUT);
+
+ slgfi NBLKS, 8;
+ lghi ROUND, (20 / 2);
+
+ /* Construct counter vectors X12/X13 & Y12/Y13. */
+ vl X4, (.Ladd_counter_0123 - .Lconsts)(%r7);
+ vl Y4, (.Ladd_counter_4567 - .Lconsts)(%r7);
+ vrepf Y12, Y3, 0;
+ vrepf Y13, Y3, 1;
+ vaccf X5, Y12, X4;
+ vaccf Y5, Y12, Y4;
+ vaf X12, Y12, X4;
+ vaf Y12, Y12, Y4;
+ vaf X13, Y13, X5;
+ vaf Y13, Y13, Y5;
+
+ vrepf X0, Y0, 0;
+ vrepf X1, Y0, 1;
+ vrepf X2, Y0, 2;
+ vrepf X3, Y0, 3;
+ vrepf X4, Y1, 0;
+ vrepf X5, Y1, 1;
+ vrepf X6, Y1, 2;
+ vrepf X7, Y1, 3;
+ vrepf X8, Y2, 0;
+ vrepf X9, Y2, 1;
+ vrepf X10, Y2, 2;
+ vrepf X11, Y2, 3;
+ vrepf X14, Y3, 2;
+ vrepf X15, Y3, 3;
+
+ /* Store counters for blocks 0-7. */
+ vstm X12, X13, (STACK_CTR + 0 * 16)(%r15);
+ vstm Y12, Y13, (STACK_CTR + 2 * 16)(%r15);
+
+ vlr Y0, X0;
+ vlr Y1, X1;
+ vlr Y2, X2;
+ vlr Y3, X3;
+ vlr Y4, X4;
+ vlr Y5, X5;
+ vlr Y6, X6;
+ vlr Y7, X7;
+ vlr Y8, X8;
+ vlr Y9, X9;
+ vlr Y10, X10;
+ vlr Y11, X11;
+ vlr Y14, X14;
+ vlr Y15, X15;
+
+ /* Update and store counter. */
+ agfi %r8, 8;
+ rllg %r5, %r8, 32;
+ stg %r5, (12 * 4)(INPUT);
+
+.balign 4
+.Lround2_8:
+ QUARTERROUND4_V8(X0, X4, X8, X12, X1, X5, X9, X13,
+ X2, X6, X10, X14, X3, X7, X11, X15,
+ Y0, Y4, Y8, Y12, Y1, Y5, Y9, Y13,
+ Y2, Y6, Y10, Y14, Y3, Y7, Y11, Y15);
+ QUARTERROUND4_V8(X0, X5, X10, X15, X1, X6, X11, X12,
+ X2, X7, X8, X13, X3, X4, X9, X14,
+ Y0, Y5, Y10, Y15, Y1, Y6, Y11, Y12,
+ Y2, Y7, Y8, Y13, Y3, Y4, Y9, Y14);
+ brctg ROUND, .Lround2_8;
+
+ /* Store blocks 4-7. */
+ vstm Y0, Y15, STACK_Y0_Y15(%r15);
+
+ /* Load counters for blocks 0-3. */
+ vlm Y0, Y1, (STACK_CTR + 0 * 16)(%r15);
+
+ lghi ROUND, 1;
+ j .Lfirst_output_4blks_8;
+
+.balign 4
+.Lsecond_output_4blks_8:
+ /* Load blocks 4-7. */
+ vlm X0, X15, STACK_Y0_Y15(%r15);
+
+ /* Load counters for blocks 4-7. */
+ vlm Y0, Y1, (STACK_CTR + 2 * 16)(%r15);
+
+ lghi ROUND, 0;
+
+.balign 4
+ /* Output four chacha20 blocks per loop. */
+.Lfirst_output_4blks_8:
+ vlm Y12, Y15, 0(INPUT);
+ PLUS(X12, Y0);
+ PLUS(X13, Y1);
+ vrepf Y0, Y12, 0;
+ vrepf Y1, Y12, 1;
+ vrepf Y2, Y12, 2;
+ vrepf Y3, Y12, 3;
+ vrepf Y4, Y13, 0;
+ vrepf Y5, Y13, 1;
+ vrepf Y6, Y13, 2;
+ vrepf Y7, Y13, 3;
+ vrepf Y8, Y14, 0;
+ vrepf Y9, Y14, 1;
+ vrepf Y10, Y14, 2;
+ vrepf Y11, Y14, 3;
+ vrepf Y14, Y15, 2;
+ vrepf Y15, Y15, 3;
+ PLUS(X0, Y0);
+ PLUS(X1, Y1);
+ PLUS(X2, Y2);
+ PLUS(X3, Y3);
+ PLUS(X4, Y4);
+ PLUS(X5, Y5);
+ PLUS(X6, Y6);
+ PLUS(X7, Y7);
+ PLUS(X8, Y8);
+ PLUS(X9, Y9);
+ PLUS(X10, Y10);
+ PLUS(X11, Y11);
+ PLUS(X14, Y14);
+ PLUS(X15, Y15);
+
+ vl Y15, (.Lbswap32 - .Lconsts)(%r7);
+ TRANSPOSE_4X4_2(X0, X1, X2, X3, X4, X5, X6, X7,
+ Y9, Y10, Y11, Y12, Y13, Y14);
+ TRANSPOSE_4X4_2(X8, X9, X10, X11, X12, X13, X14, X15,
+ Y9, Y10, Y11, Y12, Y13, Y14);
+
+ vlm Y0, Y14, 0(SRC);
+ vperm X0, X0, X0, Y15;
+ vperm X1, X1, X1, Y15;
+ vperm X2, X2, X2, Y15;
+ vperm X3, X3, X3, Y15;
+ vperm X4, X4, X4, Y15;
+ vperm X5, X5, X5, Y15;
+ vperm X6, X6, X6, Y15;
+ vperm X7, X7, X7, Y15;
+ vperm X8, X8, X8, Y15;
+ vperm X9, X9, X9, Y15;
+ vperm X10, X10, X10, Y15;
+ vperm X11, X11, X11, Y15;
+ vperm X12, X12, X12, Y15;
+ vperm X13, X13, X13, Y15;
+ vperm X14, X14, X14, Y15;
+ vperm X15, X15, X15, Y15;
+ vl Y15, (15 * 16)(SRC);
+
+ XOR(Y0, X0);
+ XOR(Y1, X4);
+ XOR(Y2, X8);
+ XOR(Y3, X12);
+ XOR(Y4, X1);
+ XOR(Y5, X5);
+ XOR(Y6, X9);
+ XOR(Y7, X13);
+ XOR(Y8, X2);
+ XOR(Y9, X6);
+ XOR(Y10, X10);
+ XOR(Y11, X14);
+ XOR(Y12, X3);
+ XOR(Y13, X7);
+ XOR(Y14, X11);
+ XOR(Y15, X15);
+ vstm Y0, Y15, 0(DST);
+
+ aghi SRC, 256;
+ aghi DST, 256;
+
+ clgije ROUND, 1, .Lsecond_output_4blks_8;
+
+ clgijhe NBLKS, 8, .Lloop8;
+
+ /* Clear the used vector registers. */
+ DST_8(CLEAR, 0, _);
+ DST_8(CLEAR, 1, _);
+ DST_8(CLEAR, 2, _);
+ DST_8(CLEAR, 3, _);
+
+ /* Clear sensitive data in stack. */
+ vlm Y0, Y15, STACK_Y0_Y15(%r15);
+ vlm Y0, Y3, STACK_CTR(%r15);
+
+ END_STACK(%r8);
+ xgr %r2, %r2;
+ br %r14;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_s390x_vx_blocks8,
+ .-_gcry_chacha20_s390x_vx_blocks8;)
+
+/**********************************************************************
+ 8-way stitched chacha20-poly1305 ("vertical")
+ **********************************************************************/
+
+.balign 8
+.globl _gcry_chacha20_poly1305_s390x_vx_blocks8
+ELF(.type _gcry_chacha20_poly1305_s390x_vx_blocks8,@function;)
+
+_gcry_chacha20_poly1305_s390x_vx_blocks8:
+ /* input:
+ * %r2: input
+ * %r3: dst
+ * %r4: src
+ * %r5: nblks (multiple of 8)
+ * %r6: poly1305 state
+ * 160(%r15): poly1305 src
+ */
+ CFI_STARTPROC();
+
+ START_STACK(%r14);
+
+ /* Store parameters to stack. */
+ stmg %r2, %r6, STACK_INPUT(%r15);
+
+ lgr POLY_RSTATE, %r6;
+ lgr NBLKS, %r5;
+
+ lg POLY_RSRC, 0(%r15);
+ lg POLY_RSRC, 160(POLY_RSRC);
+ stg POLY_RSRC, STACK_POSRC(%r15);
+
+ /* Load poly1305 state */
+ POLY1305_LOAD_STATE();
+
+.balign 4
+ /* Process eight chacha20 blocks and 32 poly1305 blocks per loop. */
+.Lloop8_poly:
+ lg INPUT, STACK_INPUT(%r15);
+ larl %r8, .Lconsts;
+
+ vlm Y0, Y3, 0(INPUT);
+
+ slgfi NBLKS, 8;
+ lghi ROUND, (20 / 2);
+
+ /* Construct counter vectors X12/X13 & Y12/Y13. */
+ vl X4, (.Ladd_counter_0123 - .Lconsts)(%r8);
+ vl Y4, (.Ladd_counter_4567 - .Lconsts)(%r8);
+ lg %r8, (12 * 4)(INPUT); /* Update counter. */
+ vrepf Y12, Y3, 0;
+ vrepf Y13, Y3, 1;
+ vaccf X5, Y12, X4;
+ vaccf Y5, Y12, Y4;
+ vaf X12, Y12, X4;
+ vaf Y12, Y12, Y4;
+ vaf X13, Y13, X5;
+ vaf Y13, Y13, Y5;
+ rllg %r8, %r8, 32;
+
+ vrepf X0, Y0, 0;
+ vrepf X1, Y0, 1;
+ vrepf X2, Y0, 2;
+ vrepf X3, Y0, 3;
+ vrepf X4, Y1, 0;
+ vrepf X5, Y1, 1;
+ vrepf X6, Y1, 2;
+ vrepf X7, Y1, 3;
+ vrepf X8, Y2, 0;
+ vrepf X9, Y2, 1;
+ vrepf X10, Y2, 2;
+ vrepf X11, Y2, 3;
+ vrepf X14, Y3, 2;
+ vrepf X15, Y3, 3;
+ agfi %r8, 8;
+
+ /* Store counters for blocks 0-7. */
+ vstm X12, X13, (STACK_CTR + 0 * 16)(%r15);
+ vstm Y12, Y13, (STACK_CTR + 2 * 16)(%r15);
+ rllg %r8, %r8, 32;
+
+ vlr Y0, X0;
+ vlr Y1, X1;
+ vlr Y2, X2;
+ vlr Y3, X3;
+ vlr Y4, X4;
+ vlr Y5, X5;
+ vlr Y6, X6;
+ vlr Y7, X7;
+ vlr Y8, X8;
+ vlr Y9, X9;
+ vlr Y10, X10;
+ vlr Y11, X11;
+ vlr Y14, X14;
+ vlr Y15, X15;
+ stg %r8, (12 * 4)(INPUT);
+
+.balign 4
+.Lround2_8_poly:
+ /* Total 30 poly1305 blocks processed by this loop. */
+ QUARTERROUND4_V8_POLY(X0, X4, X8, X12, X1, X5, X9, X13,
+ X2, X6, X10, X14, X3, X7, X11, X15,
+ Y0, Y4, Y8, Y12, Y1, Y5, Y9, Y13,
+ Y2, Y6, Y10, Y14, Y3, Y7, Y11, Y15,
+ POLY1305_BLOCK_PART1(0 * 16),
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8(),
+ POLY1305_BLOCK_PART1(1 * 16),
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4());
+ QUARTERROUND4_V8_POLY(X0, X5, X10, X15, X1, X6, X11, X12,
+ X2, X7, X8, X13, X3, X4, X9, X14,
+ Y0, Y5, Y10, Y15, Y1, Y6, Y11, Y12,
+ Y2, Y7, Y8, Y13, Y3, Y4, Y9, Y14,
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8(),
+ POLY1305_BLOCK_PART1(2 * 16);
+ INC_POLY1305_SRC(3 * 16),
+ POLY1305_BLOCK_PART2(),
+ POLY1305_BLOCK_PART3(),
+ POLY1305_BLOCK_PART4(),
+ POLY1305_BLOCK_PART5(),
+ POLY1305_BLOCK_PART6(),
+ POLY1305_BLOCK_PART7(),
+ POLY1305_BLOCK_PART8());
+ brctg ROUND, .Lround2_8_poly;
+
+ POLY1305_BLOCK_PART1(0 * 16);
+
+ /* Store blocks 4-7. */
+ vstm Y0, Y15, STACK_Y0_Y15(%r15);
+
+ /* Load counters for blocks 0-3. */
+ vlm Y0, Y1, (STACK_CTR + 0 * 16)(%r15);
+
+ stg POLY_RSRC, STACK_POSRC(%r15); /* %r14 used for INPUT/SRC/DST pointer. */
+
+ lghi ROUND, 1;
+ j .Lfirst_output_4blks_8_poly;
+
+.balign 4
+.Lsecond_output_4blks_8_poly:
+
+ POLY1305_BLOCK_PART1(1 * 16);
+
+ /* Load blocks 4-7. */
+ vlm X0, X15, STACK_Y0_Y15(%r15);
+
+ /* Load counters for blocks 4-7. */
+ vlm Y0, Y1, (STACK_CTR + 2 * 16)(%r15);
+
+ INC_POLY1305_SRC(2 * 16);
+ stg POLY_RSRC, STACK_POSRC(%r15); /* %r14 used for INPUT/SRC/DST pointer. */
+
+ lghi ROUND, 0;
+
+.balign 4
+ /* Output four chacha20 blocks and one poly1305 block per loop. */
+.Lfirst_output_4blks_8_poly:
+ lg %r14, STACK_INPUT(%r15);
+ vlm Y12, Y15, 0(%r14);
+ POLY1305_BLOCK_PART2();
+ PLUS(X12, Y0);
+ PLUS(X13, Y1);
+ vrepf Y0, Y12, 0;
+ vrepf Y1, Y12, 1;
+ vrepf Y2, Y12, 2;
+ vrepf Y3, Y12, 3;
+ vrepf Y4, Y13, 0;
+ vrepf Y5, Y13, 1;
+ vrepf Y6, Y13, 2;
+ vrepf Y7, Y13, 3;
+ vrepf Y8, Y14, 0;
+ vrepf Y9, Y14, 1;
+ vrepf Y10, Y14, 2;
+ vrepf Y11, Y14, 3;
+ vrepf Y14, Y15, 2;
+ vrepf Y15, Y15, 3;
+ POLY1305_BLOCK_PART3();
+ PLUS(X0, Y0);
+ PLUS(X1, Y1);
+ PLUS(X2, Y2);
+ PLUS(X3, Y3);
+ PLUS(X4, Y4);
+ PLUS(X5, Y5);
+ PLUS(X6, Y6);
+ PLUS(X7, Y7);
+ PLUS(X8, Y8);
+ PLUS(X9, Y9);
+ PLUS(X10, Y10);
+ PLUS(X11, Y11);
+ PLUS(X14, Y14);
+ PLUS(X15, Y15);
+ POLY1305_BLOCK_PART4();
+
+ larl %r14, .Lconsts;
+ vl Y15, (.Lbswap32 - .Lconsts)(%r14);
+ TRANSPOSE_4X4_2(X0, X1, X2, X3, X4, X5, X6, X7,
+ Y9, Y10, Y11, Y12, Y13, Y14);
+ lg %r14, STACK_SRC(%r15);
+ POLY1305_BLOCK_PART5();
+ TRANSPOSE_4X4_2(X8, X9, X10, X11, X12, X13, X14, X15,
+ Y9, Y10, Y11, Y12, Y13, Y14);
+
+ vlm Y0, Y14, 0(%r14);
+ POLY1305_BLOCK_PART6();
+ vperm X0, X0, X0, Y15;
+ vperm X1, X1, X1, Y15;
+ vperm X2, X2, X2, Y15;
+ vperm X3, X3, X3, Y15;
+ vperm X4, X4, X4, Y15;
+ vperm X5, X5, X5, Y15;
+ vperm X6, X6, X6, Y15;
+ vperm X7, X7, X7, Y15;
+ vperm X8, X8, X8, Y15;
+ vperm X9, X9, X9, Y15;
+ vperm X10, X10, X10, Y15;
+ vperm X11, X11, X11, Y15;
+ vperm X12, X12, X12, Y15;
+ vperm X13, X13, X13, Y15;
+ vperm X14, X14, X14, Y15;
+ vperm X15, X15, X15, Y15;
+ vl Y15, (15 * 16)(%r14);
+ POLY1305_BLOCK_PART7();
+
+ aghi %r14, 256;
+ stg %r14, STACK_SRC(%r15);
+ lg %r14, STACK_DST(%r15);
+
+ XOR(Y0, X0);
+ XOR(Y1, X4);
+ XOR(Y2, X8);
+ XOR(Y3, X12);
+ XOR(Y4, X1);
+ XOR(Y5, X5);
+ XOR(Y6, X9);
+ XOR(Y7, X13);
+ XOR(Y8, X2);
+ XOR(Y9, X6);
+ XOR(Y10, X10);
+ XOR(Y11, X14);
+ XOR(Y12, X3);
+ XOR(Y13, X7);
+ XOR(Y14, X11);
+ XOR(Y15, X15);
+ POLY1305_BLOCK_PART8();
+ vstm Y0, Y15, 0(%r14);
+
+ aghi %r14, 256;
+ stg %r14, STACK_DST(%r15);
+
+ lg POLY_RSRC, STACK_POSRC(%r15);
+
+ clgije ROUND, 1, .Lsecond_output_4blks_8_poly;
+
+ clgijhe NBLKS, 8, .Lloop8_poly;
+
+ /* Store poly1305 state */
+ lg POLY_RSTATE, STACK_POCTX(%r15);
+ POLY1305_STORE_STATE();
+
+ /* Clear the used vector registers */
+ DST_8(CLEAR, 0, _);
+ DST_8(CLEAR, 1, _);
+ DST_8(CLEAR, 2, _);
+ DST_8(CLEAR, 3, _);
+
+ /* Clear sensitive data in stack. */
+ vlm Y0, Y15, STACK_Y0_Y15(%r15);
+ vlm Y0, Y3, STACK_CTR(%r15);
+
+ END_STACK(%r14);
+ xgr %r2, %r2;
+ br %r14;
+ CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_s390x_vx_blocks8,
+ .-_gcry_chacha20_poly1305_s390x_vx_blocks8;)
+
+#endif /*HAVE_GCC_INLINE_ASM_S390X_VX*/
+#endif /*__s390x__*/
diff --git a/comm/third_party/libgcrypt/cipher/chacha20.c b/comm/third_party/libgcrypt/cipher/chacha20.c
new file mode 100644
index 0000000000..497594a0bb
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/chacha20.c
@@ -0,0 +1,1306 @@
+/* chacha20.c - Bernstein's ChaCha20 cipher
+ * Copyright (C) 2014,2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * For a description of the algorithm, see:
+ * http://cr.yp.to/chacha.html
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+
+#define CHACHA20_MIN_KEY_SIZE 16 /* Bytes. */
+#define CHACHA20_MAX_KEY_SIZE 32 /* Bytes. */
+#define CHACHA20_BLOCK_SIZE 64 /* Bytes. */
+#define CHACHA20_MIN_IV_SIZE 8 /* Bytes. */
+#define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */
+#define CHACHA20_CTR_SIZE 16 /* Bytes. */
+
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+/* USE_ARMV7_NEON indicates whether to enable ARMv7 NEON assembly code. */
+#undef USE_ARMV7_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_ARMV7_NEON 1
+# endif
+#endif
+
+/* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly
+ * code. */
+#undef USE_AARCH64_SIMD
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(__AARCH64EL__) \
+ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON)
+# define USE_AARCH64_SIMD 1
+# endif
+#endif
+
+/* USE_PPC_VEC indicates whether to enable PowerPC vector
+ * accelerated code. */
+#undef USE_PPC_VEC
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+# if __GNUC__ >= 4
+# define USE_PPC_VEC 1
+# endif
+# endif
+#endif
+
+/* USE_S390X_VX indicates whether to enable zSeries code. */
+#undef USE_S390X_VX
+#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+# if defined(HAVE_GCC_INLINE_ASM_S390X_VX)
+# define USE_S390X_VX 1
+# endif /* USE_S390X_VX */
+#endif
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+#else
+# define ASM_FUNC_ABI
+#endif
+
+
+typedef struct CHACHA20_context_s
+{
+ u32 input[16];
+ unsigned char pad[CHACHA20_BLOCK_SIZE];
+ unsigned int unused; /* bytes in the pad. */
+ unsigned int use_ssse3:1;
+ unsigned int use_avx2:1;
+ unsigned int use_neon:1;
+ unsigned int use_ppc:1;
+ unsigned int use_s390x:1;
+} CHACHA20_context_t;
+
+
+#ifdef USE_SSSE3
+
+unsigned int _gcry_chacha20_amd64_ssse3_blocks4(u32 *state, byte *dst,
+ const byte *src,
+ size_t nblks) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_amd64_ssse3_blocks1(u32 *state, byte *dst,
+ const byte *src,
+ size_t nblks) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks4(
+ u32 *state, byte *dst, const byte *src, size_t nblks,
+ void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks1(
+ u32 *state, byte *dst, const byte *src, size_t nblks,
+ void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI;
+
+#endif /* USE_SSSE3 */
+
+#ifdef USE_AVX2
+
+unsigned int _gcry_chacha20_amd64_avx2_blocks8(u32 *state, byte *dst,
+ const byte *src,
+ size_t nblks) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_poly1305_amd64_avx2_blocks8(
+ u32 *state, byte *dst, const byte *src, size_t nblks,
+ void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI;
+
+#endif /* USE_AVX2 */
+
+#ifdef USE_PPC_VEC
+
+unsigned int _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst,
+ const byte *src,
+ size_t nblks);
+
+unsigned int _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst,
+ const byte *src,
+ size_t nblks);
+
+#undef USE_PPC_VEC_POLY1305
+#if SIZEOF_UNSIGNED_LONG == 8
+#define USE_PPC_VEC_POLY1305 1
+unsigned int _gcry_chacha20_poly1305_ppc8_blocks4(
+ u32 *state, byte *dst, const byte *src, size_t nblks,
+ POLY1305_STATE *st, const byte *poly1305_src);
+#endif /* SIZEOF_UNSIGNED_LONG == 8 */
+
+#endif /* USE_PPC_VEC */
+
+#ifdef USE_S390X_VX
+
+unsigned int _gcry_chacha20_s390x_vx_blocks8(u32 *state, byte *dst,
+ const byte *src, size_t nblks);
+
+unsigned int _gcry_chacha20_s390x_vx_blocks4_2_1(u32 *state, byte *dst,
+ const byte *src, size_t nblks);
+
+#undef USE_S390X_VX_POLY1305
+#if SIZEOF_UNSIGNED_LONG == 8
+#define USE_S390X_VX_POLY1305 1
+unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks8(
+ u32 *state, byte *dst, const byte *src, size_t nblks,
+ POLY1305_STATE *st, const byte *poly1305_src);
+
+unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1(
+ u32 *state, byte *dst, const byte *src, size_t nblks,
+ POLY1305_STATE *st, const byte *poly1305_src);
+#endif /* SIZEOF_UNSIGNED_LONG == 8 */
+
+#endif /* USE_S390X_VX */
+
+#ifdef USE_ARMV7_NEON
+
+unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst,
+ const byte *src,
+ size_t nblks);
+
+#endif /* USE_ARMV7_NEON */
+
+#ifdef USE_AARCH64_SIMD
+
+unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst,
+ const byte *src, size_t nblks);
+
+unsigned int _gcry_chacha20_poly1305_aarch64_blocks4(
+ u32 *state, byte *dst, const byte *src, size_t nblks,
+ void *poly1305_state, const byte *poly1305_src);
+
+#endif /* USE_AARCH64_SIMD */
+
+
+static const char *selftest (void);
+
+
+#define ROTATE(v,c) (rol(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) ((u32)((v) + (w)))
+#define PLUSONE(v) (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+#define BUF_XOR_LE32(dst, src, offset, x) \
+ buf_put_le32((dst) + (offset), buf_get_le32((src) + (offset)) ^ (x))
+
+static unsigned int
+do_chacha20_blocks (u32 *input, byte *dst, const byte *src, size_t nblks)
+{
+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ unsigned int i;
+
+ while (nblks)
+ {
+ x0 = input[0];
+ x1 = input[1];
+ x2 = input[2];
+ x3 = input[3];
+ x4 = input[4];
+ x5 = input[5];
+ x6 = input[6];
+ x7 = input[7];
+ x8 = input[8];
+ x9 = input[9];
+ x10 = input[10];
+ x11 = input[11];
+ x12 = input[12];
+ x13 = input[13];
+ x14 = input[14];
+ x15 = input[15];
+
+ for (i = 20; i > 0; i -= 2)
+ {
+ QUARTERROUND(x0, x4, x8, x12)
+ QUARTERROUND(x1, x5, x9, x13)
+ QUARTERROUND(x2, x6, x10, x14)
+ QUARTERROUND(x3, x7, x11, x15)
+ QUARTERROUND(x0, x5, x10, x15)
+ QUARTERROUND(x1, x6, x11, x12)
+ QUARTERROUND(x2, x7, x8, x13)
+ QUARTERROUND(x3, x4, x9, x14)
+ }
+
+ x0 = PLUS(x0, input[0]);
+ x1 = PLUS(x1, input[1]);
+ x2 = PLUS(x2, input[2]);
+ x3 = PLUS(x3, input[3]);
+ x4 = PLUS(x4, input[4]);
+ x5 = PLUS(x5, input[5]);
+ x6 = PLUS(x6, input[6]);
+ x7 = PLUS(x7, input[7]);
+ x8 = PLUS(x8, input[8]);
+ x9 = PLUS(x9, input[9]);
+ x10 = PLUS(x10, input[10]);
+ x11 = PLUS(x11, input[11]);
+ x12 = PLUS(x12, input[12]);
+ x13 = PLUS(x13, input[13]);
+ x14 = PLUS(x14, input[14]);
+ x15 = PLUS(x15, input[15]);
+
+ input[12] = PLUSONE(input[12]);
+ input[13] = PLUS(input[13], !input[12]);
+
+ BUF_XOR_LE32(dst, src, 0, x0);
+ BUF_XOR_LE32(dst, src, 4, x1);
+ BUF_XOR_LE32(dst, src, 8, x2);
+ BUF_XOR_LE32(dst, src, 12, x3);
+ BUF_XOR_LE32(dst, src, 16, x4);
+ BUF_XOR_LE32(dst, src, 20, x5);
+ BUF_XOR_LE32(dst, src, 24, x6);
+ BUF_XOR_LE32(dst, src, 28, x7);
+ BUF_XOR_LE32(dst, src, 32, x8);
+ BUF_XOR_LE32(dst, src, 36, x9);
+ BUF_XOR_LE32(dst, src, 40, x10);
+ BUF_XOR_LE32(dst, src, 44, x11);
+ BUF_XOR_LE32(dst, src, 48, x12);
+ BUF_XOR_LE32(dst, src, 52, x13);
+ BUF_XOR_LE32(dst, src, 56, x14);
+ BUF_XOR_LE32(dst, src, 60, x15);
+
+ src += CHACHA20_BLOCK_SIZE;
+ dst += CHACHA20_BLOCK_SIZE;
+ nblks--;
+ }
+
+ /* burn_stack */
+ return (17 * sizeof(u32) + 6 * sizeof(void *));
+}
+
+
+static unsigned int
+chacha20_blocks (CHACHA20_context_t *ctx, byte *dst, const byte *src,
+ size_t nblks)
+{
+#ifdef USE_SSSE3
+ if (ctx->use_ssse3)
+ {
+ return _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, dst, src, nblks);
+ }
+#endif
+
+#ifdef USE_PPC_VEC
+ if (ctx->use_ppc)
+ {
+ return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks);
+ }
+#endif
+
+#ifdef USE_S390X_VX
+ if (ctx->use_s390x)
+ {
+ return _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, dst, src, nblks);
+ }
+#endif
+
+ return do_chacha20_blocks (ctx->input, dst, src, nblks);
+}
+
+
+static void
+chacha20_keysetup (CHACHA20_context_t *ctx, const byte *key,
+ unsigned int keylen)
+{
+ static const char sigma[16] = "expand 32-byte k";
+ static const char tau[16] = "expand 16-byte k";
+ const char *constants;
+
+ ctx->input[4] = buf_get_le32(key + 0);
+ ctx->input[5] = buf_get_le32(key + 4);
+ ctx->input[6] = buf_get_le32(key + 8);
+ ctx->input[7] = buf_get_le32(key + 12);
+ if (keylen == CHACHA20_MAX_KEY_SIZE) /* 256 bits */
+ {
+ key += 16;
+ constants = sigma;
+ }
+ else /* 128 bits */
+ {
+ constants = tau;
+ }
+ ctx->input[8] = buf_get_le32(key + 0);
+ ctx->input[9] = buf_get_le32(key + 4);
+ ctx->input[10] = buf_get_le32(key + 8);
+ ctx->input[11] = buf_get_le32(key + 12);
+ ctx->input[0] = buf_get_le32(constants + 0);
+ ctx->input[1] = buf_get_le32(constants + 4);
+ ctx->input[2] = buf_get_le32(constants + 8);
+ ctx->input[3] = buf_get_le32(constants + 12);
+}
+
+
+static void
+chacha20_ivsetup (CHACHA20_context_t * ctx, const byte *iv, size_t ivlen)
+{
+ if (ivlen == CHACHA20_CTR_SIZE)
+ {
+ ctx->input[12] = buf_get_le32 (iv + 0);
+ ctx->input[13] = buf_get_le32 (iv + 4);
+ ctx->input[14] = buf_get_le32 (iv + 8);
+ ctx->input[15] = buf_get_le32 (iv + 12);
+ }
+ else if (ivlen == CHACHA20_MAX_IV_SIZE)
+ {
+ ctx->input[12] = 0;
+ ctx->input[13] = buf_get_le32 (iv + 0);
+ ctx->input[14] = buf_get_le32 (iv + 4);
+ ctx->input[15] = buf_get_le32 (iv + 8);
+ }
+ else if (ivlen == CHACHA20_MIN_IV_SIZE)
+ {
+ ctx->input[12] = 0;
+ ctx->input[13] = 0;
+ ctx->input[14] = buf_get_le32 (iv + 0);
+ ctx->input[15] = buf_get_le32 (iv + 4);
+ }
+ else
+ {
+ ctx->input[12] = 0;
+ ctx->input[13] = 0;
+ ctx->input[14] = 0;
+ ctx->input[15] = 0;
+ }
+}
+
+
+static void
+chacha20_setiv (void *context, const byte *iv, size_t ivlen)
+{
+ CHACHA20_context_t *ctx = (CHACHA20_context_t *) context;
+
+ /* draft-nir-cfrg-chacha20-poly1305-02 defines 96-bit and 64-bit nonce. */
+ if (iv && ivlen != CHACHA20_MAX_IV_SIZE && ivlen != CHACHA20_MIN_IV_SIZE
+ && ivlen != CHACHA20_CTR_SIZE)
+ log_info ("WARNING: chacha20_setiv: bad ivlen=%u\n", (u32) ivlen);
+
+ if (iv && (ivlen == CHACHA20_MAX_IV_SIZE || ivlen == CHACHA20_MIN_IV_SIZE
+ || ivlen == CHACHA20_CTR_SIZE))
+ chacha20_ivsetup (ctx, iv, ivlen);
+ else
+ chacha20_ivsetup (ctx, NULL, 0);
+
+ /* Reset the unused pad bytes counter. */
+ ctx->unused = 0;
+}
+
+
+static gcry_err_code_t
+chacha20_do_setkey (CHACHA20_context_t *ctx,
+ const byte *key, unsigned int keylen)
+{
+ static int initialized;
+ static const char *selftest_failed;
+ unsigned int features = _gcry_get_hw_features ();
+
+ if (!initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+ if (selftest_failed)
+ log_error ("CHACHA20 selftest failed (%s)\n", selftest_failed);
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if (keylen != CHACHA20_MAX_KEY_SIZE && keylen != CHACHA20_MIN_KEY_SIZE)
+ return GPG_ERR_INV_KEYLEN;
+
+#ifdef USE_SSSE3
+ ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+#endif
+#ifdef USE_AVX2
+ ctx->use_avx2 = (features & HWF_INTEL_AVX2) != 0;
+#endif
+#ifdef USE_ARMV7_NEON
+ ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+#endif
+#ifdef USE_AARCH64_SIMD
+ ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+#endif
+#ifdef USE_PPC_VEC
+ ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0;
+#endif
+#ifdef USE_S390X_VX
+ ctx->use_s390x = (features & HWF_S390X_VX) != 0;
+#endif
+
+ (void)features;
+
+ chacha20_keysetup (ctx, key, keylen);
+
+ /* We default to a zero nonce. */
+ chacha20_setiv (ctx, NULL, 0);
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+chacha20_setkey (void *context, const byte *key, unsigned int keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ CHACHA20_context_t *ctx = (CHACHA20_context_t *) context;
+ gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen);
+ (void)bulk_ops;
+ _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *));
+ return rc;
+}
+
+
+static unsigned int
+do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf,
+ const byte *inbuf, size_t length)
+{
+ static const unsigned char zero_pad[CHACHA20_BLOCK_SIZE] = { 0, };
+ unsigned int nburn, burn = 0;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 8;
+ nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf,
+ nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_SSSE3
+ if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+ nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf,
+ nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_ARMV7_NEON
+ if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+ nburn = _gcry_chacha20_armv7_neon_blocks4(ctx->input, outbuf, inbuf,
+ nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_AARCH64_SIMD
+ if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+ nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf,
+ nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_PPC_VEC
+ if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+ nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_S390X_VX
+ if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 8)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 8;
+ nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf,
+ nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+ if (length >= CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nburn = chacha20_blocks(ctx, outbuf, inbuf, nblocks);
+ burn = nburn > burn ? nburn : burn;
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+
+ if (length > 0)
+ {
+ nburn = chacha20_blocks(ctx, ctx->pad, zero_pad, 1);
+ burn = nburn > burn ? nburn : burn;
+
+ buf_xor (outbuf, inbuf, ctx->pad, length);
+ ctx->unused = CHACHA20_BLOCK_SIZE - length;
+ }
+
+ if (burn)
+ burn += 5 * sizeof(void *);
+
+ return burn;
+}
+
+
+static void
+chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf,
+ size_t length)
+{
+ CHACHA20_context_t *ctx = (CHACHA20_context_t *) context;
+ unsigned int nburn, burn = 0;
+
+ if (!length)
+ return;
+
+ if (ctx->unused)
+ {
+ unsigned char *p = ctx->pad;
+ size_t n;
+
+ gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE);
+
+ n = ctx->unused;
+ if (n > length)
+ n = length;
+
+ buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n);
+ length -= n;
+ outbuf += n;
+ inbuf += n;
+ ctx->unused -= n;
+
+ if (!length)
+ return;
+ gcry_assert (!ctx->unused);
+ }
+
+ nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, length);
+ burn = nburn > burn ? nburn : burn;
+
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+
+gcry_err_code_t
+_gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf,
+ const byte *inbuf, size_t length)
+{
+ CHACHA20_context_t *ctx = (void *) &c->context.c;
+ unsigned int nburn, burn = 0;
+ byte *authptr = NULL;
+
+ if (!length)
+ return 0;
+
+ if (ctx->unused)
+ {
+ unsigned char *p = ctx->pad;
+ size_t n;
+
+ gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE);
+
+ n = ctx->unused;
+ if (n > length)
+ n = length;
+
+ buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n);
+ nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, n);
+ burn = nburn > burn ? nburn : burn;
+ length -= n;
+ outbuf += n;
+ inbuf += n;
+ ctx->unused -= n;
+
+ if (!length)
+ {
+ if (burn)
+ _gcry_burn_stack (burn);
+
+ return 0;
+ }
+ gcry_assert (!ctx->unused);
+ }
+
+ gcry_assert (c->u_mode.poly1305.ctx.leftover == 0);
+
+ if (0)
+ { }
+#ifdef USE_AVX2
+ else if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8)
+ {
+ nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, 8);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 8 * CHACHA20_BLOCK_SIZE;
+ outbuf += 8 * CHACHA20_BLOCK_SIZE;
+ inbuf += 8 * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+#ifdef USE_SSSE3
+ else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, 4);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 4 * CHACHA20_BLOCK_SIZE;
+ outbuf += 4 * CHACHA20_BLOCK_SIZE;
+ inbuf += 4 * CHACHA20_BLOCK_SIZE;
+ }
+ else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 2)
+ {
+ nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 2);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 2 * CHACHA20_BLOCK_SIZE;
+ outbuf += 2 * CHACHA20_BLOCK_SIZE;
+ inbuf += 2 * CHACHA20_BLOCK_SIZE;
+ }
+ else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE)
+ {
+ nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 1);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 1 * CHACHA20_BLOCK_SIZE;
+ outbuf += 1 * CHACHA20_BLOCK_SIZE;
+ inbuf += 1 * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+#ifdef USE_AARCH64_SIMD
+ else if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, 4);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 4 * CHACHA20_BLOCK_SIZE;
+ outbuf += 4 * CHACHA20_BLOCK_SIZE;
+ inbuf += 4 * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+#ifdef USE_PPC_VEC_POLY1305
+ else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 4 * CHACHA20_BLOCK_SIZE;
+ outbuf += 4 * CHACHA20_BLOCK_SIZE;
+ inbuf += 4 * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+#ifdef USE_S390X_VX_POLY1305
+ else if (ctx->use_s390x && length >= 2 * CHACHA20_BLOCK_SIZE * 8)
+ {
+ nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, 8);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 8 * CHACHA20_BLOCK_SIZE;
+ outbuf += 8 * CHACHA20_BLOCK_SIZE;
+ inbuf += 8 * CHACHA20_BLOCK_SIZE;
+ }
+ else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 4)
+ {
+ nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 4);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 4 * CHACHA20_BLOCK_SIZE;
+ outbuf += 4 * CHACHA20_BLOCK_SIZE;
+ inbuf += 4 * CHACHA20_BLOCK_SIZE;
+ }
+ else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 2)
+ {
+ nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 2);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 2 * CHACHA20_BLOCK_SIZE;
+ outbuf += 2 * CHACHA20_BLOCK_SIZE;
+ inbuf += 2 * CHACHA20_BLOCK_SIZE;
+ }
+ else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE)
+ {
+ nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 1);
+ burn = nburn > burn ? nburn : burn;
+
+ authptr = outbuf;
+ length -= 1 * CHACHA20_BLOCK_SIZE;
+ outbuf += 1 * CHACHA20_BLOCK_SIZE;
+ inbuf += 1 * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+ if (authptr)
+ {
+ size_t authoffset = outbuf - authptr;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2 &&
+ length >= 8 * CHACHA20_BLOCK_SIZE &&
+ authoffset >= 8 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 8;
+
+ nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, authptr);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ authptr += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_SSSE3
+ if (ctx->use_ssse3)
+ {
+ if (length >= 4 * CHACHA20_BLOCK_SIZE &&
+ authoffset >= 4 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+
+ nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, authptr);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ authptr += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+
+ if (length >= CHACHA20_BLOCK_SIZE &&
+ authoffset >= CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+ nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, authptr);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ authptr += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+ }
+#endif
+
+#ifdef USE_AARCH64_SIMD
+ if (ctx->use_neon &&
+ length >= 4 * CHACHA20_BLOCK_SIZE &&
+ authoffset >= 4 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+
+ nburn = _gcry_chacha20_poly1305_aarch64_blocks4(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, authptr);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ authptr += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_PPC_VEC_POLY1305
+ if (ctx->use_ppc &&
+ length >= 4 * CHACHA20_BLOCK_SIZE &&
+ authoffset >= 4 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+
+ nburn = _gcry_chacha20_poly1305_ppc8_blocks4(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, authptr);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ authptr += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_S390X_VX_POLY1305
+ if (ctx->use_s390x)
+ {
+ if (length >= 8 * CHACHA20_BLOCK_SIZE &&
+ authoffset >= 8 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 8;
+
+ burn = _gcry_chacha20_poly1305_s390x_vx_blocks8(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, authptr);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ authptr += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+
+ if (length >= CHACHA20_BLOCK_SIZE &&
+ authoffset >= CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+ burn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, authptr);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ authptr += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+ }
+#endif
+
+ if (authoffset > 0)
+ {
+ _gcry_poly1305_update (&c->u_mode.poly1305.ctx, authptr, authoffset);
+ authptr += authoffset;
+ authoffset = 0;
+ }
+
+ gcry_assert(authptr == outbuf);
+ }
+
+ while (length)
+ {
+ size_t currlen = length;
+
+ /* Since checksumming is done after encryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for checksumming. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen);
+ burn = nburn > burn ? nburn : burn;
+
+ nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf,
+ currlen);
+ burn = nburn > burn ? nburn : burn;
+
+ outbuf += currlen;
+ inbuf += currlen;
+ length -= currlen;
+ }
+
+ if (burn)
+ _gcry_burn_stack (burn);
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf,
+ const byte *inbuf, size_t length)
+{
+ CHACHA20_context_t *ctx = (void *) &c->context.c;
+ unsigned int nburn, burn = 0;
+
+ if (!length)
+ return 0;
+
+ if (ctx->unused)
+ {
+ unsigned char *p = ctx->pad;
+ size_t n;
+
+ gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE);
+
+ n = ctx->unused;
+ if (n > length)
+ n = length;
+
+ nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, n);
+ burn = nburn > burn ? nburn : burn;
+ buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n);
+ length -= n;
+ outbuf += n;
+ inbuf += n;
+ ctx->unused -= n;
+
+ if (!length)
+ {
+ if (burn)
+ _gcry_burn_stack (burn);
+
+ return 0;
+ }
+ gcry_assert (!ctx->unused);
+ }
+
+ gcry_assert (c->u_mode.poly1305.ctx.leftover == 0);
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 8;
+
+ nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, inbuf);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_SSSE3
+ if (ctx->use_ssse3)
+ {
+ if (length >= 4 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+
+ nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, inbuf);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+
+ if (length >= CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+ nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, inbuf);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+ }
+#endif
+
+#ifdef USE_AARCH64_SIMD
+ if (ctx->use_neon && length >= 4 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+
+ nburn = _gcry_chacha20_poly1305_aarch64_blocks4(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, inbuf);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_PPC_VEC_POLY1305
+ if (ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 4;
+
+ nburn = _gcry_chacha20_poly1305_ppc8_blocks4(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, inbuf);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef USE_S390X_VX_POLY1305
+ if (ctx->use_s390x)
+ {
+ if (length >= 8 * CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+ nblocks -= nblocks % 8;
+
+ nburn = _gcry_chacha20_poly1305_s390x_vx_blocks8(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, inbuf);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+
+ if (length >= CHACHA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+ nburn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1(
+ ctx->input, outbuf, inbuf, nblocks,
+ &c->u_mode.poly1305.ctx.state, inbuf);
+ burn = nburn > burn ? nburn : burn;
+
+ length -= nblocks * CHACHA20_BLOCK_SIZE;
+ outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ inbuf += nblocks * CHACHA20_BLOCK_SIZE;
+ }
+ }
+#endif
+
+ while (length)
+ {
+ size_t currlen = length;
+
+ /* Since checksumming is done before decryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for decryption. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf,
+ currlen);
+ burn = nburn > burn ? nburn : burn;
+
+ nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen);
+ burn = nburn > burn ? nburn : burn;
+
+ outbuf += currlen;
+ inbuf += currlen;
+ length -= currlen;
+ }
+
+ if (burn)
+ _gcry_burn_stack (burn);
+
+ return 0;
+}
+
+
+static const char *
+selftest (void)
+{
+ byte ctxbuf[sizeof(CHACHA20_context_t) + 15];
+ CHACHA20_context_t *ctx;
+ byte scratch[127 + 1];
+ byte buf[512 + 64 + 4];
+ int i;
+
+ /* From draft-strombergson-chacha-test-vectors */
+ static byte key_1[] = {
+ 0xc4, 0x6e, 0xc1, 0xb1, 0x8c, 0xe8, 0xa8, 0x78,
+ 0x72, 0x5a, 0x37, 0xe7, 0x80, 0xdf, 0xb7, 0x35,
+ 0x1f, 0x68, 0xed, 0x2e, 0x19, 0x4c, 0x79, 0xfb,
+ 0xc6, 0xae, 0xbe, 0xe1, 0xa6, 0x67, 0x97, 0x5d
+ };
+ static const byte nonce_1[] =
+ { 0x1a, 0xda, 0x31, 0xd5, 0xcf, 0x68, 0x82, 0x21 };
+ static const byte plaintext_1[127] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+ static const byte ciphertext_1[127] = {
+ 0xf6, 0x3a, 0x89, 0xb7, 0x5c, 0x22, 0x71, 0xf9,
+ 0x36, 0x88, 0x16, 0x54, 0x2b, 0xa5, 0x2f, 0x06,
+ 0xed, 0x49, 0x24, 0x17, 0x92, 0x30, 0x2b, 0x00,
+ 0xb5, 0xe8, 0xf8, 0x0a, 0xe9, 0xa4, 0x73, 0xaf,
+ 0xc2, 0x5b, 0x21, 0x8f, 0x51, 0x9a, 0xf0, 0xfd,
+ 0xd4, 0x06, 0x36, 0x2e, 0x8d, 0x69, 0xde, 0x7f,
+ 0x54, 0xc6, 0x04, 0xa6, 0xe0, 0x0f, 0x35, 0x3f,
+ 0x11, 0x0f, 0x77, 0x1b, 0xdc, 0xa8, 0xab, 0x92,
+ 0xe5, 0xfb, 0xc3, 0x4e, 0x60, 0xa1, 0xd9, 0xa9,
+ 0xdb, 0x17, 0x34, 0x5b, 0x0a, 0x40, 0x27, 0x36,
+ 0x85, 0x3b, 0xf9, 0x10, 0xb0, 0x60, 0xbd, 0xf1,
+ 0xf8, 0x97, 0xb6, 0x29, 0x0f, 0x01, 0xd1, 0x38,
+ 0xae, 0x2c, 0x4c, 0x90, 0x22, 0x5b, 0xa9, 0xea,
+ 0x14, 0xd5, 0x18, 0xf5, 0x59, 0x29, 0xde, 0xa0,
+ 0x98, 0xca, 0x7a, 0x6c, 0xcf, 0xe6, 0x12, 0x27,
+ 0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33
+ };
+
+ /* 16-byte alignment required for amd64 implementation. */
+ ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15);
+
+ chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+ chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+ scratch[sizeof (scratch) - 1] = 0;
+ chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1);
+ if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1))
+ return "ChaCha20 encryption test 1 failed.";
+ if (scratch[sizeof (scratch) - 1])
+ return "ChaCha20 wrote too much.";
+ chacha20_setkey (ctx, key_1, sizeof (key_1), NULL);
+ chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+ chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1);
+ if (memcmp (scratch, plaintext_1, sizeof plaintext_1))
+ return "ChaCha20 decryption test 1 failed.";
+
+ for (i = 0; i < sizeof buf; i++)
+ buf[i] = i;
+ chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+ chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+ /*encrypt */
+ chacha20_encrypt_stream (ctx, buf, buf, sizeof buf);
+ /*decrypt */
+ chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+ chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+ chacha20_encrypt_stream (ctx, buf, buf, 1);
+ chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1);
+ chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1,
+ buf + (sizeof buf) - 1, 1);
+ for (i = 0; i < sizeof buf; i++)
+ if (buf[i] != (byte) i)
+ return "ChaCha20 encryption test 2 failed.";
+
+ chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+ chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+ /* encrypt */
+ for (i = 0; i < sizeof buf; i++)
+ chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1);
+ /* decrypt */
+ chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+ chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+ chacha20_encrypt_stream (ctx, buf, buf, sizeof buf);
+ for (i = 0; i < sizeof buf; i++)
+ if (buf[i] != (byte) i)
+ return "ChaCha20 encryption test 3 failed.";
+
+ return NULL;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_chacha20 = {
+ GCRY_CIPHER_CHACHA20,
+ {0, 0}, /* flags */
+ "CHACHA20", /* name */
+ NULL, /* aliases */
+ NULL, /* oids */
+ 1, /* blocksize in bytes. */
+ CHACHA20_MAX_KEY_SIZE * 8, /* standard key length in bits. */
+ sizeof (CHACHA20_context_t),
+ chacha20_setkey,
+ NULL,
+ NULL,
+ chacha20_encrypt_stream,
+ chacha20_encrypt_stream,
+ NULL,
+ NULL,
+ chacha20_setiv
+};
diff --git a/comm/third_party/libgcrypt/cipher/cipher-aeswrap.c b/comm/third_party/libgcrypt/cipher/cipher-aeswrap.c
new file mode 100644
index 0000000000..c182657e1f
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-aeswrap.c
@@ -0,0 +1,209 @@
+/* cipher-aeswrap.c - Generic AESWRAP mode implementation
+ * Copyright (C) 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+/* Perform the AES-Wrap algorithm as specified by RFC3394. We
+ implement this as a mode usable with any cipher algorithm of
+ blocksize 128. */
+gcry_err_code_t
+_gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen )
+{
+ int j, x;
+ size_t n, i;
+ unsigned char *r, *a, *b;
+ unsigned char t[8];
+ unsigned int burn, nburn;
+
+#if MAX_BLOCKSIZE < 8
+#error Invalid block size
+#endif
+ /* We require a cipher with a 128 bit block length. */
+ if (c->spec->blocksize != 16)
+ return GPG_ERR_INV_LENGTH;
+
+ /* The output buffer must be able to hold the input data plus one
+ additional block. */
+ if (outbuflen < inbuflen + 8)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ /* Input data must be multiple of 64 bits. */
+ if (inbuflen % 8)
+ return GPG_ERR_INV_ARG;
+
+ n = inbuflen / 8;
+
+ /* We need at least two 64 bit blocks. */
+ if (n < 2)
+ return GPG_ERR_INV_ARG;
+
+ burn = 0;
+
+ r = outbuf;
+ a = outbuf; /* We store A directly in OUTBUF. */
+ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */
+
+ /* Copy the inbuf to the outbuf. */
+ memmove (r+8, inbuf, inbuflen);
+
+ /* If an IV has been set we use that IV as the Alternative Initial
+ Value; if it has not been set we use the standard value. */
+ if (c->marks.iv)
+ memcpy (a, c->u_iv.iv, 8);
+ else
+ memset (a, 0xa6, 8);
+
+ memset (t, 0, sizeof t); /* t := 0. */
+
+ for (j = 0; j <= 5; j++)
+ {
+ for (i = 1; i <= n; i++)
+ {
+ /* B := AES_k( A | R[i] ) */
+ memcpy (b, a, 8);
+ memcpy (b+8, r+i*8, 8);
+ nburn = c->spec->encrypt (&c->context.c, b, b);
+ burn = nburn > burn ? nburn : burn;
+ /* t := t + 1 */
+ for (x = 7; x >= 0; x--)
+ {
+ t[x]++;
+ if (t[x])
+ break;
+ }
+ /* A := MSB_64(B) ^ t */
+ cipher_block_xor(a, b, t, 8);
+ /* R[i] := LSB_64(B) */
+ memcpy (r+i*8, b+8, 8);
+ }
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+/* Perform the AES-Unwrap algorithm as specified by RFC3394. We
+ implement this as a mode usable with any cipher algorithm of
+ blocksize 128. */
+gcry_err_code_t
+_gcry_cipher_aeswrap_decrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ int j, x;
+ size_t n, i;
+ unsigned char *r, *a, *b;
+ unsigned char t[8];
+ unsigned int burn, nburn;
+
+#if MAX_BLOCKSIZE < 8
+#error Invalid block size
+#endif
+ /* We require a cipher with a 128 bit block length. */
+ if (c->spec->blocksize != 16)
+ return GPG_ERR_INV_LENGTH;
+
+ /* The output buffer must be able to hold the input data minus one
+ additional block. Fixme: The caller has more restrictive checks
+ - we may want to fix them for this mode. */
+ if (outbuflen + 8 < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ /* Input data must be multiple of 64 bits. */
+ if (inbuflen % 8)
+ return GPG_ERR_INV_ARG;
+
+ n = inbuflen / 8;
+
+ /* We need at least three 64 bit blocks. */
+ if (n < 3)
+ return GPG_ERR_INV_ARG;
+
+ burn = 0;
+
+ r = outbuf;
+ a = c->lastiv; /* We use c->LASTIV as buffer for A. */
+ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */
+
+ /* Copy the inbuf to the outbuf and save A. */
+ memcpy (a, inbuf, 8);
+ memmove (r, inbuf+8, inbuflen-8);
+ n--; /* Reduce to actual number of data blocks. */
+
+ /* t := 6 * n */
+ i = n * 6; /* The range is valid because: n = inbuflen / 8 - 1. */
+ for (x=0; x < 8 && x < sizeof (i); x++)
+ t[7-x] = i >> (8*x);
+ for (; x < 8; x++)
+ t[7-x] = 0;
+
+ for (j = 5; j >= 0; j--)
+ {
+ for (i = n; i >= 1; i--)
+ {
+ /* B := AES_k^1( (A ^ t)| R[i] ) */
+ cipher_block_xor(b, a, t, 8);
+ memcpy (b+8, r+(i-1)*8, 8);
+ nburn = c->spec->decrypt (&c->context.c, b, b);
+ burn = nburn > burn ? nburn : burn;
+ /* t := t - 1 */
+ for (x = 7; x >= 0; x--)
+ {
+ t[x]--;
+ if (t[x] != 0xff)
+ break;
+ }
+ /* A := MSB_64(B) */
+ memcpy (a, b, 8);
+ /* R[i] := LSB_64(B) */
+ memcpy (r+(i-1)*8, b+8, 8);
+ }
+ }
+
+ /* If an IV has been set we compare against this Alternative Initial
+ Value; if it has not been set we compare against the standard IV. */
+ if (c->marks.iv)
+ j = memcmp (a, c->u_iv.iv, 8);
+ else
+ {
+ for (j=0, x=0; x < 8; x++)
+ if (a[x] != 0xa6)
+ {
+ j=1;
+ break;
+ }
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return j? GPG_ERR_CHECKSUM : 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-cbc.c b/comm/third_party/libgcrypt/cipher/cipher-cbc.c
new file mode 100644
index 0000000000..d4df1e72aa
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-cbc.c
@@ -0,0 +1,292 @@
+/* cipher-cbc.c - Generic CBC mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "./cipher-internal.h"
+#include "bufhelp.h"
+
+
+
+static inline unsigned int
+cbc_encrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf,
+ const unsigned char *inbuf, size_t nblocks, size_t blocksize,
+ int is_cbc_cmac)
+{
+
+ unsigned int burn, nburn;
+ size_t n;
+
+ burn = 0;
+
+ if (c->bulk.cbc_enc)
+ {
+ c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks,
+ is_cbc_cmac);
+ }
+ else
+ {
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ unsigned char *ivp;
+
+ ivp = c->u_iv.iv;
+
+ for (n=0; n < nblocks; n++ )
+ {
+ cipher_block_xor (outbuf, inbuf, ivp, blocksize);
+ nburn = enc_fn ( &c->context.c, outbuf, outbuf );
+ burn = nburn > burn ? nburn : burn;
+ ivp = outbuf;
+ inbuf += blocksize;
+ if (!is_cbc_cmac)
+ outbuf += blocksize;
+ }
+
+ if (ivp != c->u_iv.iv)
+ cipher_block_cpy (c->u_iv.iv, ivp, blocksize);
+ }
+
+ return burn;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ size_t blocksize_mask = blocksize - 1;
+ size_t nblocks = inbuflen >> blocksize_shift;
+ int is_cbc_cmac = !!(c->flags & GCRY_CIPHER_CBC_MAC);
+ unsigned int burn;
+
+ if (outbuflen < (is_cbc_cmac ? blocksize : inbuflen))
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ if (inbuflen & blocksize_mask)
+ return GPG_ERR_INV_LENGTH;
+
+ burn = cbc_encrypt_inner(c, outbuf, inbuf, nblocks, blocksize, is_cbc_cmac);
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_cts_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ size_t blocksize_mask = blocksize - 1;
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t nblocks = inbuflen >> blocksize_shift;
+ unsigned int burn, nburn;
+ unsigned char *ivp;
+ int i;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ if ((inbuflen & blocksize_mask) && !(inbuflen > blocksize))
+ return GPG_ERR_INV_LENGTH;
+
+ burn = 0;
+
+ if (inbuflen > blocksize)
+ {
+ if ((inbuflen & blocksize_mask) == 0)
+ nblocks--;
+ }
+
+ burn = cbc_encrypt_inner(c, outbuf, inbuf, nblocks, blocksize, 0);
+ inbuf += nblocks << blocksize_shift;
+ outbuf += nblocks << blocksize_shift;
+
+ if (inbuflen > blocksize)
+ {
+ /* We have to be careful here, since outbuf might be equal to
+ inbuf. */
+ size_t restbytes;
+ unsigned char b;
+
+ if ((inbuflen & blocksize_mask) == 0)
+ restbytes = blocksize;
+ else
+ restbytes = inbuflen & blocksize_mask;
+
+ outbuf -= blocksize;
+ for (ivp = c->u_iv.iv, i = 0; i < restbytes; i++)
+ {
+ b = inbuf[i];
+ outbuf[blocksize + i] = outbuf[i];
+ outbuf[i] = b ^ *ivp++;
+ }
+ for (; i < blocksize; i++)
+ outbuf[i] = 0 ^ *ivp++;
+
+ nburn = enc_fn (&c->context.c, outbuf, outbuf);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_cpy (c->u_iv.iv, outbuf, blocksize);
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+static inline unsigned int
+cbc_decrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf,
+ const unsigned char *inbuf, size_t nblocks, size_t blocksize)
+{
+ unsigned int burn, nburn;
+ size_t n;
+
+ burn = 0;
+
+ if (c->bulk.cbc_dec)
+ {
+ c->bulk.cbc_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+ }
+ else
+ {
+ gcry_cipher_decrypt_t dec_fn = c->spec->decrypt;
+
+ for (n = 0; n < nblocks; n++)
+ {
+ /* Because outbuf and inbuf might be the same, we must not overwrite
+ the original ciphertext block. We use LASTIV as intermediate
+ storage here because it is not used otherwise. */
+ nburn = dec_fn ( &c->context.c, c->lastiv, inbuf );
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor_n_copy_2 (outbuf, c->lastiv, c->u_iv.iv, inbuf,
+ blocksize);
+ inbuf += blocksize;
+ outbuf += blocksize;
+ }
+ }
+
+ return burn;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ size_t blocksize_mask = blocksize - 1;
+ size_t nblocks = inbuflen >> blocksize_shift;
+ unsigned int burn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ if (inbuflen & blocksize_mask)
+ return GPG_ERR_INV_LENGTH;
+
+ burn = cbc_decrypt_inner(c, outbuf, inbuf, nblocks, blocksize);
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_cts_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ size_t blocksize_mask = blocksize - 1;
+ gcry_cipher_decrypt_t dec_fn = c->spec->decrypt;
+ size_t nblocks = inbuflen >> blocksize_shift;
+ unsigned int burn, nburn;
+ int i;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ if ((inbuflen & blocksize_mask) && !(inbuflen > blocksize))
+ return GPG_ERR_INV_LENGTH;
+
+ burn = 0;
+
+ if (inbuflen > blocksize)
+ {
+ nblocks--;
+ if ((inbuflen & blocksize_mask) == 0)
+ nblocks--;
+ cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize);
+ }
+
+ burn = cbc_decrypt_inner(c, outbuf, inbuf, nblocks, blocksize);
+ inbuf += nblocks << blocksize_shift;
+ outbuf += nblocks << blocksize_shift;
+
+ if (inbuflen > blocksize)
+ {
+ size_t restbytes;
+
+ if ((inbuflen & blocksize_mask) == 0)
+ restbytes = blocksize;
+ else
+ restbytes = inbuflen & blocksize_mask;
+
+ cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */
+ buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */
+
+ nburn = dec_fn ( &c->context.c, outbuf, inbuf );
+ burn = nburn > burn ? nburn : burn;
+ buf_xor(outbuf, outbuf, c->u_iv.iv, restbytes);
+
+ buf_cpy (outbuf + blocksize, outbuf, restbytes);
+ for(i=restbytes; i < blocksize; i++)
+ c->u_iv.iv[i] = outbuf[i];
+ nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor(outbuf, outbuf, c->lastiv, blocksize);
+ /* c->lastiv is now really lastlastiv, does this matter? */
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-ccm.c b/comm/third_party/libgcrypt/cipher/cipher-ccm.c
new file mode 100644
index 0000000000..dcb268d084
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-ccm.c
@@ -0,0 +1,415 @@
+/* cipher-ccm.c - CTR mode with CBC-MAC mode implementation
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+#define set_burn(burn, nburn) do { \
+ unsigned int __nburn = (nburn); \
+ (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0)
+
+
+static unsigned int
+do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen,
+ int do_padding)
+{
+ const unsigned int blocksize = 16;
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ unsigned char tmp[blocksize];
+ unsigned int burn = 0;
+ unsigned int unused = c->u_mode.ccm.mac_unused;
+ size_t nblocks;
+ size_t n;
+
+ if (inlen == 0 && (unused == 0 || !do_padding))
+ return 0;
+
+ do
+ {
+ if (inlen + unused < blocksize || unused > 0)
+ {
+ n = (inlen > blocksize - unused) ? blocksize - unused : inlen;
+
+ buf_cpy (&c->u_mode.ccm.macbuf[unused], inbuf, n);
+ unused += n;
+ inlen -= n;
+ inbuf += n;
+ }
+ if (!inlen)
+ {
+ if (!do_padding)
+ break;
+
+ n = blocksize - unused;
+ if (n > 0)
+ {
+ memset (&c->u_mode.ccm.macbuf[unused], 0, n);
+ unused = blocksize;
+ }
+ }
+
+ if (unused > 0)
+ {
+ /* Process one block from macbuf. */
+ cipher_block_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf,
+ blocksize);
+ set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ));
+
+ unused = 0;
+ }
+
+ if (c->bulk.cbc_enc)
+ {
+ nblocks = inlen / blocksize;
+ c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, tmp, inbuf, nblocks, 1);
+ inbuf += nblocks * blocksize;
+ inlen -= nblocks * blocksize;
+
+ wipememory (tmp, sizeof(tmp));
+ }
+ else
+ {
+ while (inlen >= blocksize)
+ {
+ cipher_block_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
+
+ set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ));
+
+ inlen -= blocksize;
+ inbuf += blocksize;
+ }
+ }
+ }
+ while (inlen > 0);
+
+ c->u_mode.ccm.mac_unused = unused;
+
+ if (burn)
+ burn += 4 * sizeof(void *);
+
+ return burn;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce,
+ size_t noncelen)
+{
+ unsigned int marks_key;
+ size_t L = 15 - noncelen;
+ size_t L_;
+
+ L_ = L - 1;
+
+ if (!nonce)
+ return GPG_ERR_INV_ARG;
+ /* Length field must be 2, 3, ..., or 8. */
+ if (L < 2 || L > 8)
+ return GPG_ERR_INV_LENGTH;
+
+ /* Reset state */
+ marks_key = c->marks.key;
+ memset (&c->u_mode, 0, sizeof(c->u_mode));
+ memset (&c->marks, 0, sizeof(c->marks));
+ memset (&c->u_iv, 0, sizeof(c->u_iv));
+ memset (&c->u_ctr, 0, sizeof(c->u_ctr));
+ memset (c->lastiv, 0, sizeof(c->lastiv));
+ c->unused = 0;
+ c->marks.key = marks_key;
+
+ /* Setup CTR */
+ c->u_ctr.ctr[0] = L_;
+ memcpy (&c->u_ctr.ctr[1], nonce, noncelen);
+ memset (&c->u_ctr.ctr[1 + noncelen], 0, L);
+
+ /* Setup IV */
+ c->u_iv.iv[0] = L_;
+ memcpy (&c->u_iv.iv[1], nonce, noncelen);
+ /* Add (8 * M_ + 64 * flags) to iv[0] and set iv[noncelen + 1 ... 15] later
+ in set_aad. */
+ memset (&c->u_iv.iv[1 + noncelen], 0, L);
+
+ c->u_mode.ccm.nonce = 1;
+
+ return GPG_ERR_NO_ERROR;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_set_lengths (gcry_cipher_hd_t c, u64 encryptlen, u64 aadlen,
+ u64 taglen)
+{
+ unsigned int burn = 0;
+ unsigned char b0[16];
+ size_t noncelen = 15 - (c->u_iv.iv[0] + 1);
+ u64 M = taglen;
+ u64 M_;
+ int i;
+
+ M_ = (M - 2) / 2;
+
+ /* Authentication field must be 4, 6, 8, 10, 12, 14 or 16. */
+ if ((M_ * 2 + 2) != M || M < 4 || M > 16)
+ return GPG_ERR_INV_LENGTH;
+ if (!c->u_mode.ccm.nonce || c->marks.tag)
+ return GPG_ERR_INV_STATE;
+ if (c->u_mode.ccm.lengths)
+ return GPG_ERR_INV_STATE;
+
+ c->u_mode.ccm.authlen = taglen;
+ c->u_mode.ccm.encryptlen = encryptlen;
+ c->u_mode.ccm.aadlen = aadlen;
+
+ /* Complete IV setup. */
+ c->u_iv.iv[0] += (aadlen > 0) * 64 + M_ * 8;
+ for (i = 16 - 1; i >= 1 + noncelen; i--)
+ {
+ c->u_iv.iv[i] = encryptlen & 0xff;
+ encryptlen >>= 8;
+ }
+
+ memcpy (b0, c->u_iv.iv, 16);
+ memset (c->u_iv.iv, 0, 16);
+
+ set_burn (burn, do_cbc_mac (c, b0, 16, 0));
+
+ if (aadlen == 0)
+ {
+ /* Do nothing. */
+ }
+ else if (aadlen > 0 && aadlen <= (unsigned int)0xfeff)
+ {
+ b0[0] = (aadlen >> 8) & 0xff;
+ b0[1] = aadlen & 0xff;
+ set_burn (burn, do_cbc_mac (c, b0, 2, 0));
+ }
+ else if (aadlen > 0xfeff && aadlen <= (unsigned int)0xffffffff)
+ {
+ b0[0] = 0xff;
+ b0[1] = 0xfe;
+ buf_put_be32(&b0[2], aadlen);
+ set_burn (burn, do_cbc_mac (c, b0, 6, 0));
+ }
+ else if (aadlen > (unsigned int)0xffffffff)
+ {
+ b0[0] = 0xff;
+ b0[1] = 0xff;
+ buf_put_be64(&b0[2], aadlen);
+ set_burn (burn, do_cbc_mac (c, b0, 10, 0));
+ }
+
+ /* Generate S_0 and increase counter. */
+ set_burn (burn, c->spec->encrypt ( &c->context.c, c->u_mode.ccm.s0,
+ c->u_ctr.ctr ));
+ c->u_ctr.ctr[15]++;
+
+ if (burn)
+ _gcry_burn_stack (burn + sizeof(void *) * 5);
+
+ c->u_mode.ccm.lengths = 1;
+
+ return GPG_ERR_NO_ERROR;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
+ size_t abuflen)
+{
+ unsigned int burn;
+
+ if (abuflen > 0 && !abuf)
+ return GPG_ERR_INV_ARG;
+ if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->marks.tag)
+ return GPG_ERR_INV_STATE;
+ if (abuflen > c->u_mode.ccm.aadlen)
+ return GPG_ERR_INV_LENGTH;
+
+ c->u_mode.ccm.aadlen -= abuflen;
+ burn = do_cbc_mac (c, abuf, abuflen, c->u_mode.ccm.aadlen == 0);
+
+ if (burn)
+ _gcry_burn_stack (burn + sizeof(void *) * 5);
+
+ return GPG_ERR_NO_ERROR;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_tag (gcry_cipher_hd_t c, unsigned char *outbuf,
+ size_t outbuflen, int check)
+{
+ unsigned int burn;
+
+ if (!outbuf || outbuflen == 0)
+ return GPG_ERR_INV_ARG;
+ /* Tag length must be same as initial authlen. */
+ if (c->u_mode.ccm.authlen != outbuflen)
+ return GPG_ERR_INV_LENGTH;
+ if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->u_mode.ccm.aadlen > 0)
+ return GPG_ERR_INV_STATE;
+ /* Initial encrypt length must match with length of actual data processed. */
+ if (c->u_mode.ccm.encryptlen > 0)
+ return GPG_ERR_UNFINISHED;
+
+ if (!c->marks.tag)
+ {
+ burn = do_cbc_mac (c, NULL, 0, 1); /* Perform final padding. */
+
+ /* Add S_0 */
+ cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16);
+
+ wipememory (c->u_ctr.ctr, 16);
+ wipememory (c->u_mode.ccm.s0, 16);
+ wipememory (c->u_mode.ccm.macbuf, 16);
+
+ if (burn)
+ _gcry_burn_stack (burn + sizeof(void *) * 5);
+
+ c->marks.tag = 1;
+ }
+
+ if (!check)
+ {
+ memcpy (outbuf, c->u_iv.iv, outbuflen);
+ return GPG_ERR_NO_ERROR;
+ }
+ else
+ {
+ return buf_eq_const(outbuf, c->u_iv.iv, outbuflen) ?
+ GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM;
+ }
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+ size_t taglen)
+{
+ return _gcry_cipher_ccm_tag (c, outtag, taglen, 0);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+ size_t taglen)
+{
+ return _gcry_cipher_ccm_tag (c, (unsigned char *)intag, taglen, 1);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf,
+ size_t outbuflen, const unsigned char *inbuf,
+ size_t inbuflen)
+{
+ gcry_err_code_t err = 0;
+ unsigned int burn = 0;
+ unsigned int nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths ||
+ c->u_mode.ccm.aadlen > 0)
+ return GPG_ERR_INV_STATE;
+ if (inbuflen > c->u_mode.ccm.encryptlen)
+ return GPG_ERR_INV_LENGTH;
+
+ while (inbuflen)
+ {
+ size_t currlen = inbuflen;
+
+ /* Since checksumming is done before encryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for encryption. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ c->u_mode.ccm.encryptlen -= currlen;
+ nburn = do_cbc_mac (c, inbuf, currlen, 0);
+ burn = nburn > burn ? nburn : burn;
+
+ err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+ if (err)
+ break;
+
+ outbuf += currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ inbuflen -= currlen;
+ }
+
+ if (burn)
+ _gcry_burn_stack (burn + sizeof(void *) * 5);
+ return err;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_decrypt (gcry_cipher_hd_t c, unsigned char *outbuf,
+ size_t outbuflen, const unsigned char *inbuf,
+ size_t inbuflen)
+{
+ gcry_err_code_t err = 0;
+ unsigned int burn = 0;
+ unsigned int nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths ||
+ c->u_mode.ccm.aadlen > 0)
+ return GPG_ERR_INV_STATE;
+ if (inbuflen > c->u_mode.ccm.encryptlen)
+ return GPG_ERR_INV_LENGTH;
+
+ while (inbuflen)
+ {
+ size_t currlen = inbuflen;
+
+ /* Since checksumming is done after decryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for checksumming. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+ if (err)
+ break;
+
+ c->u_mode.ccm.encryptlen -= currlen;
+ nburn = do_cbc_mac (c, outbuf, currlen, 0);
+ burn = nburn > burn ? nburn : burn;
+
+ outbuf += currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ inbuflen -= currlen;
+ }
+
+ if (burn)
+ _gcry_burn_stack (burn + sizeof(void *) * 5);
+ return err;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-cfb.c b/comm/third_party/libgcrypt/cipher/cipher-cfb.c
new file mode 100644
index 0000000000..012c6c13c3
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-cfb.c
@@ -0,0 +1,317 @@
+/* cipher-cfb.c - Generic CFB mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ unsigned char *ivp;
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ size_t blocksize_x_2 = blocksize + blocksize;
+ unsigned int burn, nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ if ( inbuflen <= c->unused )
+ {
+ /* Short enough to be encoded by the remaining XOR mask. */
+ /* XOR the input with the IV and store input into IV. */
+ ivp = c->u_iv.iv + blocksize - c->unused;
+ buf_xor_2dst(outbuf, ivp, inbuf, inbuflen);
+ c->unused -= inbuflen;
+ return 0;
+ }
+
+ burn = 0;
+
+ if ( c->unused )
+ {
+ /* XOR the input with the IV and store input into IV */
+ inbuflen -= c->unused;
+ ivp = c->u_iv.iv + blocksize - c->unused;
+ buf_xor_2dst(outbuf, ivp, inbuf, c->unused);
+ outbuf += c->unused;
+ inbuf += c->unused;
+ c->unused = 0;
+ }
+
+ /* Now we can process complete blocks. We use a loop as long as we
+ have at least 2 blocks and use conditions for the rest. This
+ also allows to use a bulk encryption function if available. */
+ if (inbuflen >= blocksize_x_2 && c->bulk.cfb_enc)
+ {
+ size_t nblocks = inbuflen >> blocksize_shift;
+ c->bulk.cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+ outbuf += nblocks << blocksize_shift;
+ inbuf += nblocks << blocksize_shift;
+ inbuflen -= nblocks << blocksize_shift;
+ }
+ else
+ {
+ while ( inbuflen >= blocksize_x_2 )
+ {
+ /* Encrypt the IV. */
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ /* XOR the input with the IV and store input into IV. */
+ cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+ outbuf += blocksize;
+ inbuf += blocksize;
+ inbuflen -= blocksize;
+ }
+ }
+
+ if ( inbuflen >= blocksize )
+ {
+ /* Save the current IV and then encrypt the IV. */
+ cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ /* XOR the input with the IV and store input into IV */
+ cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+ outbuf += blocksize;
+ inbuf += blocksize;
+ inbuflen -= blocksize;
+ }
+ if ( inbuflen )
+ {
+ /* Save the current IV and then encrypt the IV. */
+ cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ c->unused = blocksize;
+ /* Apply the XOR. */
+ c->unused -= inbuflen;
+ buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, inbuflen);
+ outbuf += inbuflen;
+ inbuf += inbuflen;
+ inbuflen = 0;
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ unsigned char *ivp;
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ size_t blocksize_x_2 = blocksize + blocksize;
+ unsigned int burn, nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ if (inbuflen <= c->unused)
+ {
+ /* Short enough to be encoded by the remaining XOR mask. */
+ /* XOR the input with the IV and store input into IV. */
+ ivp = c->u_iv.iv + blocksize - c->unused;
+ buf_xor_n_copy(outbuf, ivp, inbuf, inbuflen);
+ c->unused -= inbuflen;
+ return 0;
+ }
+
+ burn = 0;
+
+ if (c->unused)
+ {
+ /* XOR the input with the IV and store input into IV. */
+ inbuflen -= c->unused;
+ ivp = c->u_iv.iv + blocksize - c->unused;
+ buf_xor_n_copy(outbuf, ivp, inbuf, c->unused);
+ outbuf += c->unused;
+ inbuf += c->unused;
+ c->unused = 0;
+ }
+
+ /* Now we can process complete blocks. We use a loop as long as we
+ have at least 2 blocks and use conditions for the rest. This
+ also allows to use a bulk encryption function if available. */
+ if (inbuflen >= blocksize_x_2 && c->bulk.cfb_dec)
+ {
+ size_t nblocks = inbuflen >> blocksize_shift;
+ c->bulk.cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+ outbuf += nblocks << blocksize_shift;
+ inbuf += nblocks << blocksize_shift;
+ inbuflen -= nblocks << blocksize_shift;
+ }
+ else
+ {
+ while (inbuflen >= blocksize_x_2 )
+ {
+ /* Encrypt the IV. */
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ /* XOR the input with the IV and store input into IV. */
+ cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+ outbuf += blocksize;
+ inbuf += blocksize;
+ inbuflen -= blocksize;
+ }
+ }
+
+ if (inbuflen >= blocksize )
+ {
+ /* Save the current IV and then encrypt the IV. */
+ cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize);
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ /* XOR the input with the IV and store input into IV */
+ cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+ outbuf += blocksize;
+ inbuf += blocksize;
+ inbuflen -= blocksize;
+ }
+
+ if (inbuflen)
+ {
+ /* Save the current IV and then encrypt the IV. */
+ cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize );
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ c->unused = blocksize;
+ /* Apply the XOR. */
+ c->unused -= inbuflen;
+ buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, inbuflen);
+ outbuf += inbuflen;
+ inbuf += inbuflen;
+ inbuflen = 0;
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cfb8_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t blocksize = c->spec->blocksize;
+ unsigned int burn, nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ burn = 0;
+
+ while ( inbuflen > 0)
+ {
+ int i;
+
+ /* Encrypt the IV. */
+ nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+
+ outbuf[0] = c->lastiv[0] ^ inbuf[0];
+
+ /* Bitshift iv by 8 bit to the left */
+ for (i = 0; i < blocksize-1; i++)
+ c->u_iv.iv[i] = c->u_iv.iv[i+1];
+
+ /* append cipher text to iv */
+ c->u_iv.iv[blocksize-1] = outbuf[0];
+
+ outbuf += 1;
+ inbuf += 1;
+ inbuflen -= 1;
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cfb8_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t blocksize = c->spec->blocksize;
+ unsigned int burn, nburn;
+ unsigned char appendee;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ burn = 0;
+
+ while (inbuflen > 0)
+ {
+ int i;
+
+ /* Encrypt the IV. */
+ nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+
+ /* inbuf might == outbuf, make sure we keep the value
+ so we can append it later */
+ appendee = inbuf[0];
+
+ outbuf[0] = inbuf[0] ^ c->lastiv[0];
+
+ /* Bitshift iv by 8 bit to the left */
+ for (i = 0; i < blocksize-1; i++)
+ c->u_iv.iv[i] = c->u_iv.iv[i+1];
+
+ c->u_iv.iv[blocksize-1] = appendee;
+
+ outbuf += 1;
+ inbuf += 1;
+ inbuflen -= 1;
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-cmac.c b/comm/third_party/libgcrypt/cipher/cipher-cmac.c
new file mode 100644
index 0000000000..4efd1e19b4
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-cmac.c
@@ -0,0 +1,292 @@
+/* cmac.c - CMAC, Cipher-based MAC.
+ * Copyright (C) 2013,2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+
+#define set_burn(burn, nburn) do { \
+ unsigned int __nburn = (nburn); \
+ (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0)
+
+
+gcry_err_code_t
+_gcry_cmac_write (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
+ const byte * inbuf, size_t inlen)
+{
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ byte outbuf[MAX_BLOCKSIZE];
+ unsigned int burn = 0;
+ unsigned int nblocks;
+ size_t n;
+
+ if (ctx->tag)
+ return GPG_ERR_INV_STATE;
+
+ if (!inbuf)
+ return GPG_ERR_INV_ARG;
+
+ if (inlen == 0)
+ return 0;
+
+ /* Last block is needed for cmac_final. */
+ if (ctx->mac_unused + inlen <= blocksize)
+ {
+ buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, inlen);
+ ctx->mac_unused += inlen;
+ inbuf += inlen;
+ inlen -= inlen;
+
+ return 0;
+ }
+
+ if (ctx->mac_unused)
+ {
+ n = inlen;
+ if (n > blocksize - ctx->mac_unused)
+ n = blocksize - ctx->mac_unused;
+
+ buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, n);
+ ctx->mac_unused += n;
+ inbuf += n;
+ inlen -= n;
+
+ cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
+ set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv));
+
+ ctx->mac_unused = 0;
+ }
+
+ if (c->bulk.cbc_enc && inlen > blocksize)
+ {
+ nblocks = inlen >> blocksize_shift;
+ nblocks -= ((nblocks << blocksize_shift) == inlen);
+
+ c->bulk.cbc_enc (&c->context.c, ctx->u_iv.iv, outbuf, inbuf, nblocks, 1);
+ inbuf += nblocks << blocksize_shift;
+ inlen -= nblocks << blocksize_shift;
+
+ wipememory (outbuf, sizeof (outbuf));
+ }
+ else
+ while (inlen > blocksize)
+ {
+ cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, inbuf, blocksize);
+ set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv));
+ inlen -= blocksize;
+ inbuf += blocksize;
+ }
+
+ /* Make sure that last block is passed to cmac_final. */
+ if (inlen == 0)
+ BUG ();
+
+ n = inlen;
+ if (n > blocksize - ctx->mac_unused)
+ n = blocksize - ctx->mac_unused;
+
+ buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, n);
+ ctx->mac_unused += n;
+ inbuf += n;
+ inlen -= n;
+
+ if (burn)
+ _gcry_burn_stack (burn + 4 * sizeof (void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cmac_generate_subkeys (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx)
+{
+ const unsigned int blocksize = c->spec->blocksize;
+ byte rb, carry, t, bi;
+ unsigned int burn;
+ int i, j;
+ union
+ {
+ size_t _aligned;
+ byte buf[MAX_BLOCKSIZE];
+ } u;
+
+ /* Tell compiler that we require a cipher with a 64bit or 128 bit block
+ * length, to allow better optimization of this function. */
+ if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1))
+ return GPG_ERR_INV_CIPHER_MODE;
+
+ if (MAX_BLOCKSIZE < blocksize)
+ BUG ();
+
+ /* encrypt zero block */
+ memset (u.buf, 0, blocksize);
+ burn = c->spec->encrypt (&c->context.c, u.buf, u.buf);
+
+ /* Currently supported blocksizes are 16 and 8. */
+ rb = blocksize == 16 ? 0x87 : 0x1B /* blocksize == 8 */ ;
+
+ for (j = 0; j < 2; j++)
+ {
+ /* Generate subkeys K1 and K2 */
+ carry = 0;
+ for (i = blocksize - 1; i >= 0; i--)
+ {
+ bi = u.buf[i];
+ t = carry | (bi << 1);
+ carry = bi >> 7;
+ u.buf[i] = t & 0xff;
+ ctx->subkeys[j][i] = u.buf[i];
+ }
+ u.buf[blocksize - 1] ^= carry ? rb : 0;
+ ctx->subkeys[j][blocksize - 1] = u.buf[blocksize - 1];
+ }
+
+ wipememory (&u, sizeof (u));
+ if (burn)
+ _gcry_burn_stack (burn + 4 * sizeof (void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cmac_final (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx)
+{
+ const unsigned int blocksize = c->spec->blocksize;
+ unsigned int count = ctx->mac_unused;
+ unsigned int burn;
+ byte *subkey;
+
+ /* Tell compiler that we require a cipher with a 64bit or 128 bit block
+ * length, to allow better optimization of this function. */
+ if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1))
+ return GPG_ERR_INV_CIPHER_MODE;
+
+ if (count == blocksize)
+ subkey = ctx->subkeys[0]; /* K1 */
+ else
+ {
+ subkey = ctx->subkeys[1]; /* K2 */
+ ctx->macbuf[count++] = 0x80;
+ while (count < blocksize)
+ ctx->macbuf[count++] = 0;
+ }
+
+ cipher_block_xor (ctx->macbuf, ctx->macbuf, subkey, blocksize);
+
+ cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
+ burn = c->spec->encrypt (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv);
+ if (burn)
+ _gcry_burn_stack (burn + 4 * sizeof (void *));
+
+ ctx->mac_unused = 0;
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+cmac_tag (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
+ unsigned char *tag, size_t taglen, int check)
+{
+ gcry_err_code_t ret;
+
+ if (!tag || taglen == 0 || taglen > c->spec->blocksize)
+ return GPG_ERR_INV_ARG;
+
+ if (!ctx->tag)
+ {
+ ret = _gcry_cmac_final (c, ctx);
+ if (ret != 0)
+ return ret;
+
+ ctx->tag = 1;
+ }
+
+ if (!check)
+ {
+ memcpy (tag, ctx->u_iv.iv, taglen);
+ return GPG_ERR_NO_ERROR;
+ }
+ else
+ {
+ return buf_eq_const (tag, ctx->u_iv.iv, taglen) ?
+ GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM;
+ }
+}
+
+
+void
+_gcry_cmac_reset (gcry_cmac_context_t *ctx)
+{
+ char tmp_buf[sizeof(ctx->subkeys)];
+
+ /* Only keep subkeys when reseting context. */
+
+ buf_cpy (tmp_buf, ctx->subkeys, sizeof(ctx->subkeys));
+ memset (ctx, 0, sizeof(*ctx));
+ buf_cpy (ctx->subkeys, tmp_buf, sizeof(ctx->subkeys));
+ wipememory (tmp_buf, sizeof(tmp_buf));
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cmac_authenticate (gcry_cipher_hd_t c,
+ const unsigned char *abuf, size_t abuflen)
+{
+ if (abuflen > 0 && !abuf)
+ return GPG_ERR_INV_ARG;
+ /* To support new blocksize, update cmac_generate_subkeys() then add new
+ blocksize here. */
+ if (c->spec->blocksize != 16 && c->spec->blocksize != 8)
+ return GPG_ERR_INV_CIPHER_MODE;
+
+ return _gcry_cmac_write (c, &c->u_mode.cmac, abuf, abuflen);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cmac_get_tag (gcry_cipher_hd_t c,
+ unsigned char *outtag, size_t taglen)
+{
+ return cmac_tag (c, &c->u_mode.cmac, outtag, taglen, 0);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cmac_check_tag (gcry_cipher_hd_t c,
+ const unsigned char *intag, size_t taglen)
+{
+ return cmac_tag (c, &c->u_mode.cmac, (unsigned char *) intag, taglen, 1);
+}
+
+gcry_err_code_t
+_gcry_cipher_cmac_set_subkeys (gcry_cipher_hd_t c)
+{
+ return _gcry_cmac_generate_subkeys (c, &c->u_mode.cmac);
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-ctr.c b/comm/third_party/libgcrypt/cipher/cipher-ctr.c
new file mode 100644
index 0000000000..5f0afc2f88
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-ctr.c
@@ -0,0 +1,120 @@
+/* cipher-ctr.c - Generic CTR mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ size_t n;
+ int i;
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ size_t nblocks;
+ unsigned int burn, nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ burn = 0;
+
+ /* First process a left over encrypted counter. */
+ if (c->unused)
+ {
+ gcry_assert (c->unused < blocksize);
+ i = blocksize - c->unused;
+ n = c->unused > inbuflen ? inbuflen : c->unused;
+ buf_xor(outbuf, inbuf, &c->lastiv[i], n);
+ c->unused -= n;
+ inbuf += n;
+ outbuf += n;
+ inbuflen -= n;
+ }
+
+ /* Use a bulk method if available. */
+ nblocks = inbuflen >> blocksize_shift;
+ if (nblocks && c->bulk.ctr_enc)
+ {
+ c->bulk.ctr_enc (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks);
+ inbuf += nblocks << blocksize_shift;
+ outbuf += nblocks << blocksize_shift;
+ inbuflen -= nblocks << blocksize_shift;
+ }
+
+ /* If we don't have a bulk method use the standard method. We also
+ use this method for the a remaining partial block. */
+ if (inbuflen)
+ {
+ unsigned char tmp[MAX_BLOCKSIZE];
+
+ n = blocksize;
+ do
+ {
+ nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr);
+ burn = nburn > burn ? nburn : burn;
+
+ cipher_block_add(c->u_ctr.ctr, 1, blocksize);
+
+ if (inbuflen < blocksize)
+ break;
+ cipher_block_xor(outbuf, inbuf, tmp, blocksize);
+
+ inbuflen -= n;
+ outbuf += n;
+ inbuf += n;
+ }
+ while (inbuflen);
+
+ if (inbuflen)
+ {
+ n = inbuflen;
+ buf_xor(outbuf, inbuf, tmp, inbuflen);
+
+ inbuflen -= n;
+ outbuf += n;
+ inbuf += n;
+ }
+
+ /* Save the unused bytes of the counter. */
+ c->unused = blocksize - n;
+ if (c->unused)
+ buf_cpy (c->lastiv+n, tmp+n, c->unused);
+
+ wipememory (tmp, sizeof tmp);
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-eax.c b/comm/third_party/libgcrypt/cipher/cipher-eax.c
new file mode 100644
index 0000000000..08f815a9e4
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-eax.c
@@ -0,0 +1,289 @@
+/* cipher-eax.c - EAX implementation
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_eax_encrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ gcry_err_code_t err;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->marks.tag)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->marks.iv)
+ {
+ err = _gcry_cipher_eax_set_nonce (c, NULL, 0);
+ if (err != 0)
+ return err;
+ }
+
+ while (inbuflen)
+ {
+ size_t currlen = inbuflen;
+
+ /* Since checksumming is done after encryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for checksumming. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+ if (err != 0)
+ return err;
+
+ err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, outbuf,
+ currlen);
+ if (err != 0)
+ return err;
+
+ outbuf += currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ inbuflen -= currlen;
+ }
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_decrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ gcry_err_code_t err;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->marks.tag)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->marks.iv)
+ {
+ err = _gcry_cipher_eax_set_nonce (c, NULL, 0);
+ if (err != 0)
+ return err;
+ }
+
+ while (inbuflen)
+ {
+ size_t currlen = inbuflen;
+
+ /* Since checksumming is done before decryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for decryption. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, inbuf,
+ currlen);
+ if (err != 0)
+ return err;
+
+ err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+ if (err != 0)
+ return err;
+
+ outbuf += currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ inbuflen -= currlen;
+ }
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_authenticate (gcry_cipher_hd_t c,
+ const byte * aadbuf, size_t aadbuflen)
+{
+ gcry_err_code_t err;
+
+ if (c->marks.tag)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->marks.iv)
+ {
+ err = _gcry_cipher_eax_set_nonce (c, NULL, 0);
+ if (err != 0)
+ return err;
+ }
+
+ return _gcry_cmac_write (c, &c->u_mode.eax.cmac_header, aadbuf, aadbuflen);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_setkey (gcry_cipher_hd_t c)
+{
+ gcry_err_code_t err;
+
+ err = _gcry_cmac_generate_subkeys (c, &c->u_mode.eax.cmac_header);
+ if (err != 0)
+ return err;
+
+ buf_cpy (c->u_mode.eax.cmac_ciphertext.subkeys,
+ c->u_mode.eax.cmac_header.subkeys,
+ sizeof(c->u_mode.eax.cmac_header.subkeys));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_set_nonce (gcry_cipher_hd_t c, const byte *nonce,
+ size_t noncelen)
+{
+ gcry_cmac_context_t nonce_cmac;
+ unsigned char initbuf[MAX_BLOCKSIZE];
+ gcry_err_code_t err;
+
+ c->marks.iv = 0;
+ c->marks.tag = 0;
+
+ _gcry_cmac_reset (&c->u_mode.eax.cmac_header);
+ _gcry_cmac_reset (&c->u_mode.eax.cmac_ciphertext);
+
+ /* Calculate nonce CMAC */
+
+ memset(&nonce_cmac, 0, sizeof(nonce_cmac));
+ memset(&initbuf, 0, sizeof(initbuf));
+
+ buf_cpy (&nonce_cmac.subkeys, c->u_mode.eax.cmac_header.subkeys,
+ sizeof(c->u_mode.eax.cmac_header.subkeys));
+
+ err = _gcry_cmac_write (c, &nonce_cmac, initbuf, c->spec->blocksize);
+ if (err != 0)
+ return err;
+
+ if (noncelen != 0)
+ {
+ err = _gcry_cmac_write (c, &nonce_cmac, nonce, noncelen);
+ if (err != 0)
+ return err;
+ }
+
+ err = _gcry_cmac_final (c, &nonce_cmac);
+ if (err != 0)
+ return err;
+
+ cipher_block_cpy (c->u_iv.iv, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
+ cipher_block_cpy (c->u_ctr.ctr, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
+
+ wipememory (&nonce_cmac, sizeof(nonce_cmac));
+
+ /* Prepare header CMAC */
+
+ initbuf[c->spec->blocksize - 1] = 1;
+ err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_header, initbuf,
+ c->spec->blocksize);
+ if (err != 0)
+ return err;
+
+ /* Prepare ciphertext CMAC */
+
+ initbuf[c->spec->blocksize - 1] = 2;
+ err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, initbuf,
+ c->spec->blocksize);
+ if (err != 0)
+ return err;
+
+ c->marks.iv = 1;
+ c->marks.tag = 0;
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+_gcry_cipher_eax_tag (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen, int check)
+{
+ gcry_err_code_t err;
+
+ if (!c->marks.tag)
+ {
+ err = _gcry_cmac_final (c, &c->u_mode.eax.cmac_header);
+ if (err != 0)
+ return err;
+
+ err = _gcry_cmac_final (c, &c->u_mode.eax.cmac_ciphertext);
+ if (err != 0)
+ return err;
+
+ cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_header.u_iv.iv,
+ MAX_BLOCKSIZE);
+ cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_ciphertext.u_iv.iv,
+ MAX_BLOCKSIZE);
+
+ _gcry_cmac_reset (&c->u_mode.eax.cmac_header);
+ _gcry_cmac_reset (&c->u_mode.eax.cmac_ciphertext);
+
+ c->marks.tag = 1;
+ }
+
+ if (!check)
+ {
+ if (outbuflen > c->spec->blocksize)
+ outbuflen = c->spec->blocksize;
+
+ /* NB: We already checked that OUTBUF is large enough to hold
+ * the result or has valid truncated length. */
+ memcpy (outbuf, c->u_iv.iv, outbuflen);
+ }
+ else
+ {
+ /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF
+ * and thus we need to compare its length first. */
+ if (!(outbuflen <= c->spec->blocksize)
+ || !buf_eq_const (outbuf, c->u_iv.iv, outbuflen))
+ return GPG_ERR_CHECKSUM;
+ }
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+ size_t taglen)
+{
+ return _gcry_cipher_eax_tag (c, outtag, taglen, 0);
+}
+
+gcry_err_code_t
+_gcry_cipher_eax_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+ size_t taglen)
+{
+ return _gcry_cipher_eax_tag (c, (unsigned char *) intag, taglen, 1);
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S
new file mode 100644
index 0000000000..a801a5e57b
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S
@@ -0,0 +1,341 @@
+/* cipher-gcm-armv7-neon.S - ARM/NEON accelerated GHASH
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.syntax unified
+.fpu neon
+.arm
+
+.text
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+.align 4
+gcry_gcm_reduction_constant:
+.Lrconst64:
+ .quad 0xc200000000000000
+
+/* Register macros */
+
+#define rhash q0
+#define rhash_l d0
+#define rhash_h d1
+
+#define rh1 q1
+#define rh1_l d2
+#define rh1_h d3
+
+#define rbuf q2
+#define rbuf_l d4
+#define rbuf_h d5
+
+#define rbuf1 q3
+#define rbuf1_l d6
+#define rbuf1_h d7
+
+#define t0q q4
+#define t0l d8
+#define t0h d9
+
+#define t1q q5
+#define t1l d10
+#define t1h d11
+
+#define t2q q6
+#define t2l d12
+#define t2h d13
+
+#define t3q q7
+#define t3l d14
+#define t3h d15
+
+/* q8 */
+#define k16 d16
+#define k32 d17
+
+/* q9 */
+#define k48 d18
+
+#define k0 q10
+
+#define rr0 q11
+#define rr0_l d22
+#define rr0_h d23
+
+#define rr1 q12
+#define rr1_l d24
+#define rr1_h d25
+
+#define rt0 q13
+#define rt0_l d26
+#define rt0_h d27
+
+#define rt1 q14
+#define rt1_l d28
+#define rt1_h d29
+
+#define rrconst q15
+#define rrconst_l d30
+#define rrconst_h d31
+
+/* Macro for 64x64=>128 carry-less multiplication using vmull.p8 instruction.
+ *
+ * From "Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R. Fast Software
+ * Polynomial Multiplication on ARM Processors using the NEON Engine. The
+ * Second International Workshop on Modern Cryptography and Security
+ * Engineering — MoCrySEn, 2013". */
+
+#define vmull_p64(rq, rl, rh, ad, bd) \
+ vext.8 t0l, ad, ad, $1; \
+ vmull.p8 t0q, t0l, bd; \
+ vext.8 rl, bd, bd, $1; \
+ vmull.p8 rq, ad, rl; \
+ vext.8 t1l, ad, ad, $2; \
+ vmull.p8 t1q, t1l, bd; \
+ vext.8 t3l, bd, bd, $2; \
+ vmull.p8 t3q, ad, t3l; \
+ vext.8 t2l, ad, ad, $3; \
+ vmull.p8 t2q, t2l, bd; \
+ veor t0q, t0q, rq; \
+ vext.8 rl, bd, bd, $3; \
+ vmull.p8 rq, ad, rl; \
+ veor t1q, t1q, t3q; \
+ vext.8 t3l, bd, bd, $4; \
+ vmull.p8 t3q, ad, t3l; \
+ veor t0l, t0l, t0h; \
+ vand t0h, t0h, k48; \
+ veor t1l, t1l, t1h; \
+ vand t1h, t1h, k32; \
+ veor t2q, t2q, rq; \
+ veor t0l, t0l, t0h; \
+ veor t1l, t1l, t1h; \
+ veor t2l, t2l, t2h; \
+ vand t2h, t2h, k16; \
+ veor t3l, t3l, t3h; \
+ vmov.i64 t3h, $0; \
+ vext.8 t0q, t0q, t0q, $15; \
+ veor t2l, t2l, t2h; \
+ vext.8 t1q, t1q, t1q, $14; \
+ vmull.p8 rq, ad, bd; \
+ vext.8 t2q, t2q, t2q, $13; \
+ vext.8 t3q, t3q, t3q, $12; \
+ veor t0q, t0q, t1q; \
+ veor t2q, t2q, t3q; \
+ veor rq, rq, t0q; \
+ veor rq, rq, t2q;
+
+/* GHASH macros.
+ *
+ * See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+
+/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1)
+ * Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'.
+ */
+#define PMUL_128x128(r0, r1, a, b, t1, t2, interleave_op) \
+ veor t1##_h, b##_l, b##_h; \
+ veor t1##_l, a##_l, a##_h; \
+ vmull_p64( r0, r0##_l, r0##_h, a##_l, b##_l ); \
+ vmull_p64( r1, r1##_l, r1##_h, a##_h, b##_h ); \
+ vmull_p64( t2, t2##_h, t2##_l, t1##_h, t1##_l ); \
+ interleave_op; \
+ veor t2, r0; \
+ veor t2, r1; \
+ veor r0##_h, t2##_l; \
+ veor r1##_l, t2##_h;
+
+/* Reduction using Xor and Shift.
+ * Input: 'r0:r1', Output: 'a'
+ *
+ * See "Shay Gueron, Michael E. Kounavis. Intel Carry-Less Multiplication
+ * Instruction and its Usage for Computing the GCM Mode" for details.
+ */
+#define REDUCTION(a, r0, r1, t, interleave_op) \
+ vshl.u32 t0q, r0, #31; \
+ vshl.u32 t1q, r0, #30; \
+ vshl.u32 t2q, r0, #25; \
+ veor t0q, t0q, t1q; \
+ veor t0q, t0q, t2q; \
+ vext.8 t, t0q, k0, #4; \
+ vext.8 t0q, k0, t0q, #(16-12); \
+ veor r0, r0, t0q; \
+ interleave_op; \
+ vshr.u32 t0q, r0, #1; \
+ vshr.u32 t1q, r0, #2; \
+ vshr.u32 t2q, r0, #7; \
+ veor t0q, t0q, t1q; \
+ veor t0q, t0q, t2q; \
+ veor t0q, t0q, t; \
+ veor r0, r0, t0q; \
+ veor a, r0, r1;
+
+#define _(...) __VA_ARGS__
+#define __ _()
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) veor reg, reg;
+
+
+/*
+ * unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result,
+ * const byte *buf, size_t nblocks);
+ */
+.align 3
+.globl _gcry_ghash_armv7_neon
+.type _gcry_ghash_armv7_neon,%function;
+_gcry_ghash_armv7_neon:
+ /* input:
+ * r0: gcm_key
+ * r1: result/hash
+ * r2: buf
+ * r3: nblocks
+ */
+ push {r4-r6, lr}
+
+ cmp r3, #0
+ beq .Ldo_nothing
+
+ vpush {q4-q7}
+
+ vld1.64 {rhash}, [r1]
+ vld1.64 {rh1}, [r0]
+
+ vrev64.8 rhash, rhash /* byte-swap */
+
+ vmov.i64 k0, #0x0
+ vmov.i64 k16, #0xffff
+ vmov.i64 k32, #0xffffffff
+ vmov.i64 k48, #0xffffffffffff
+
+ vext.8 rhash, rhash, rhash, #8
+
+ /* Handle remaining blocks. */
+
+ vld1.64 {rbuf}, [r2]!
+ subs r3, r3, #1
+
+ vrev64.8 rbuf, rbuf /* byte-swap */
+ vext.8 rbuf, rbuf, rbuf, #8
+
+ veor rhash, rhash, rbuf
+
+ beq .Lend
+
+.Loop:
+ vld1.64 {rbuf}, [r2]!
+ PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(vrev64.8 rbuf, rbuf))
+ REDUCTION(rhash, rr0, rr1, rt0, _(vext.8 rbuf, rbuf, rbuf, #8))
+ subs r3, r3, #1
+ veor rhash, rhash, rbuf
+
+ bne .Loop
+
+.Lend:
+ PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(CLEAR_REG(rbuf)))
+ REDUCTION(rhash, rr0, rr1, rt0, _(CLEAR_REG(rh1)))
+
+.Ldone:
+ CLEAR_REG(rr1)
+ vrev64.8 rhash, rhash /* byte-swap */
+ CLEAR_REG(rt0)
+ CLEAR_REG(rr0)
+ vext.8 rhash, rhash, rhash, #8
+ CLEAR_REG(rt1)
+ CLEAR_REG(t0q)
+ CLEAR_REG(t1q)
+ CLEAR_REG(t2q)
+ CLEAR_REG(t3q)
+ vst1.64 {rhash}, [r1]
+ CLEAR_REG(rhash)
+
+ vpop {q4-q7}
+
+.Ldo_nothing:
+ mov r0, #0
+ pop {r4-r6, pc}
+.size _gcry_ghash_armv7_neon,.-_gcry_ghash_armv7_neon;
+
+
+/*
+ * void _gcry_ghash_armv7_neon (void *gcm_key);
+ */
+.align 3
+.globl _gcry_ghash_setup_armv7_neon
+.type _gcry_ghash_setup_armv7_neon,%function;
+_gcry_ghash_setup_armv7_neon:
+ /* input:
+ * r0: gcm_key
+ */
+
+ vpush {q4-q7}
+
+ GET_DATA_POINTER(r2, .Lrconst64, r3)
+
+ vld1.64 {rrconst_h}, [r2]
+
+#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \
+ /* H <<< 1 */ \
+ vshr.s64 ma, ib, #63; \
+ vshr.u64 oa, ib, #63; \
+ vshr.u64 ob, ia, #63; \
+ vand ma, const_d; \
+ vshl.u64 ib, ib, #1; \
+ vshl.u64 ia, ia, #1; \
+ vorr ob, ib; \
+ vorr oa, ia; \
+ veor ob, ma; \
+ vst1.64 {oa, ob}, [r_out]
+
+ vld1.64 {rhash}, [r0]
+ vrev64.8 rhash, rhash /* byte-swap */
+ vext.8 rhash, rhash, rhash, #8
+
+ vmov rbuf1, rhash
+ GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */
+
+ CLEAR_REG(rh1)
+ CLEAR_REG(rhash)
+ CLEAR_REG(rbuf1)
+ CLEAR_REG(rrconst)
+ vpop {q4-q7}
+ bx lr
+.size _gcry_ghash_setup_armv7_neon,.-_gcry_ghash_setup_armv7_neon;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S
new file mode 100644
index 0000000000..1de66a1626
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S
@@ -0,0 +1,433 @@
+/* cipher-gcm-armv8-aarch32-ce.S - ARM/CE accelerated GHASH
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+.align 4
+gcry_gcm_reduction_constant:
+.Lrconst64:
+ .quad 0xc200000000000000
+
+
+/* Register macros */
+
+#define rhash q0
+#define rhash_l d0
+#define rhash_h d1
+
+#define rh1 q1
+#define rh1_l d2
+#define rh1_h d3
+
+#define rbuf q2
+#define rbuf_l d4
+#define rbuf_h d5
+
+#define rbuf1 q3
+#define rbuf1_l d6
+#define rbuf1_h d7
+
+#define rbuf2 q4
+#define rbuf2_l d8
+#define rbuf2_h d9
+
+#define rbuf3 q5
+#define rbuf3_l d10
+#define rbuf3_h d11
+
+#define rh2 q6
+#define rh2_l d12
+#define rh2_h d13
+
+#define rh3 q7
+#define rh3_l d14
+#define rh3_h d15
+
+#define rh4 q8
+#define rh4_l d16
+#define rh4_h d17
+
+#define rr2 q9
+#define rr2_l d18
+#define rr2_h d19
+
+#define rr3 q10
+#define rr3_l d20
+#define rr3_h d21
+
+#define rr0 q11
+#define rr0_l d22
+#define rr0_h d23
+
+#define rr1 q12
+#define rr1_l d24
+#define rr1_h d25
+
+#define rt0 q13
+#define rt0_l d26
+#define rt0_h d27
+
+#define rt1 q14
+#define rt1_l d28
+#define rt1_h d29
+
+#define rrconst q15
+#define rrconst_l d30
+#define rrconst_h d31
+
+/* GHASH macros */
+
+/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+
+/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1)
+ * Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'.
+ */
+#define PMUL_128x128(r0, r1, a, b, t, interleave_op) \
+ veor t##_h, b##_l, b##_h; \
+ veor t##_l, a##_l, a##_h; \
+ vmull.p64 r0, a##_l, b##_l; \
+ vmull.p64 r1, a##_h, b##_h; \
+ vmull.p64 t, t##_h, t##_l; \
+ interleave_op; \
+ veor t, r0; \
+ veor t, r1; \
+ veor r0##_h, t##_l; \
+ veor r1##_l, t##_h;
+
+/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A)
+ * Note: 'r1A' may be 'aA' or 'bA', 'r0A' must not be either 'aA' or 'bA'.
+ * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B)
+ * Note: 'r1B' may be 'aB' or 'bB', 'r0B' must not be either 'aB' or 'bB'.
+ */
+#define PMUL_128x128_2(r0A, r1A, aA, bA, r0B, r1B, aB, bB, tA, tB, interleave_op) \
+ veor tA##_h, bA##_l, bA##_h; \
+ veor tA##_l, aA##_l, aA##_h; \
+ veor tB##_h, bB##_l, bB##_h; \
+ veor tB##_l, aB##_l, aB##_h; \
+ vmull.p64 r0A, aA##_l, bA##_l; \
+ vmull.p64 r1A, aA##_h, bA##_h; \
+ vmull.p64 tA, tA##_h, tA##_l; \
+ vmull.p64 r0B, aB##_l, bB##_l; \
+ vmull.p64 r1B, aB##_h, bB##_h; \
+ vmull.p64 tB, tB##_h, tB##_l; \
+ interleave_op; \
+ veor tA, r0A; \
+ veor tA, r1A; \
+ veor tB, r0B; \
+ veor tB, r1B; \
+ veor r0A##_h, tA##_l; \
+ veor r1A##_l, tA##_h; \
+ veor r0B##_h, tB##_l; \
+ veor r1B##_l, tB##_h; \
+
+/* Input: 'r0:r1', Output: 'a' */
+#define REDUCTION(a, r0, r1, rconst, t, interleave_op) \
+ vmull.p64 t, r0##_l, rconst; \
+ veor r0##_h, t##_l; \
+ veor r1##_l, t##_h; \
+ interleave_op; \
+ vmull.p64 t, r0##_h, rconst; \
+ veor r1, t; \
+ veor a, r0, r1;
+
+#define _(...) __VA_ARGS__
+#define __ _()
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) veor reg, reg;
+
+
+/*
+ * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result,
+ * const byte *buf, size_t nblocks,
+ * void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_armv8_ce_pmull
+.type _gcry_ghash_armv8_ce_pmull,%function;
+_gcry_ghash_armv8_ce_pmull:
+ /* input:
+ * r0: gcm_key
+ * r1: result/hash
+ * r2: buf
+ * r3: nblocks
+ * %st+0: gcm_table
+ */
+ push {r4-r6, lr}
+
+ cmp r3, #0
+ beq .Ldo_nothing
+
+ GET_DATA_POINTER(r4, .Lrconst64, lr)
+
+ vld1.64 {rhash}, [r1]
+ vld1.64 {rh1}, [r0]
+
+ vrev64.8 rhash, rhash /* byte-swap */
+ vld1.64 {rrconst_h}, [r4]
+ vext.8 rhash, rhash, rhash, #8
+
+ cmp r3, #4
+ blo .Less_than_4
+
+ /* Bulk processing of 4 blocks per loop iteration. */
+
+ ldr r5, [sp, #(4*4)];
+ add r6, r5, #32
+
+ vpush {q4-q7}
+
+ vld1.64 {rh2-rh3}, [r5]
+ vld1.64 {rh4}, [r6]
+
+ vld1.64 {rbuf-rbuf1}, [r2]!
+ sub r3, r3, #4
+ vld1.64 {rbuf2-rbuf3}, [r2]!
+
+ cmp r3, #4
+ vrev64.8 rbuf, rbuf /* byte-swap */
+ vrev64.8 rbuf1, rbuf1 /* byte-swap */
+ vrev64.8 rbuf2, rbuf2 /* byte-swap */
+ vrev64.8 rbuf3, rbuf3 /* byte-swap */
+
+ vext.8 rbuf, rbuf, rbuf, #8
+ vext.8 rbuf1, rbuf1, rbuf1, #8
+ vext.8 rbuf2, rbuf2, rbuf2, #8
+ vext.8 rbuf3, rbuf3, rbuf3, #8
+ veor rhash, rhash, rbuf /* in0 ^ hash */
+
+ blo .Lend_4
+
+.Loop_4:
+ /* (in0 ^ hash) * H⁴ => rr2:rr3 */
+ /* (in1) * H³ => rr0:rr1 */
+ PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __)
+
+ vld1.64 {rbuf-rbuf1}, [r2]!
+ sub r3, r3, #4
+ veor rr0, rr0, rr2
+ veor rr1, rr1, rr3
+
+ /* (in2) * H² => rr2:rr3 */
+ /* (in3) * H¹ => rhash:rbuf3 */
+ PMUL_128x128_2(rr2, rr3, rbuf2, rh2, rhash, rbuf3, rbuf3, rh1, rt0, rt1,
+ _(vrev64.8 rbuf, rbuf))
+
+ vld1.64 {rbuf2}, [r2]!
+
+ vrev64.8 rbuf1, rbuf1
+ veor rr0, rr0, rr2
+ veor rr1, rr1, rr3
+
+ cmp r3, #4
+ vext.8 rbuf, rbuf, rbuf, #8
+ vext.8 rbuf1, rbuf1, rbuf1, #8
+
+ veor rr0, rr0, rhash
+ veor rr1, rr1, rbuf3
+
+ vld1.64 {rbuf3}, [r2]!
+
+ REDUCTION(rhash, rr0, rr1, rrconst_h, rt1,
+ _(vrev64.8 rbuf2, rbuf2;
+ vrev64.8 rbuf3, rbuf3))
+
+ vext.8 rbuf2, rbuf2, rbuf2, #8
+ vext.8 rbuf3, rbuf3, rbuf3, #8
+ veor rhash, rhash, rbuf /* in0 ^ hash */
+
+ bhs .Loop_4
+
+.Lend_4:
+ /* (in0 ^ hash) * H⁴ => rr2:rr3 */
+ /* (in1) * H³ => rr0:rr1 */
+ PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __)
+
+ /* (in2) * H² => rhash:rbuf */
+ /* (in3) * H¹ => rbuf1:rbuf2 */
+ PMUL_128x128_2(rhash, rbuf, rbuf2, rh2, rbuf1, rbuf2, rbuf3, rh1, rt0, rt1,
+ _(veor rr0, rr0, rr2;
+ veor rr1, rr1, rr3))
+
+ veor rr0, rr0, rhash
+ veor rr1, rr1, rbuf
+
+ veor rr0, rr0, rbuf1
+ veor rr1, rr1, rbuf2
+
+ REDUCTION(rhash, rr0, rr1, rrconst_h, rt1,
+ _(CLEAR_REG(rr2);
+ CLEAR_REG(rr3);
+ CLEAR_REG(rbuf1);
+ CLEAR_REG(rbuf2);
+ CLEAR_REG(rbuf3);
+ CLEAR_REG(rh2);
+ CLEAR_REG(rh3);
+ CLEAR_REG(rh4)))
+
+ vpop {q4-q7}
+
+ cmp r3, #0
+ beq .Ldone
+
+.Less_than_4:
+ /* Handle remaining blocks. */
+
+ vld1.64 {rbuf}, [r2]!
+ subs r3, r3, #1
+
+ vrev64.8 rbuf, rbuf /* byte-swap */
+ vext.8 rbuf, rbuf, rbuf, #8
+
+ veor rhash, rhash, rbuf
+
+ beq .Lend
+
+.Loop:
+ vld1.64 {rbuf}, [r2]!
+ subs r3, r3, #1
+ PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(vrev64.8 rbuf, rbuf))
+ REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(vext.8 rbuf, rbuf, rbuf, #8))
+ veor rhash, rhash, rbuf
+
+ bne .Loop
+
+.Lend:
+ PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(CLEAR_REG(rbuf)))
+ REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(CLEAR_REG(rh1)))
+
+.Ldone:
+ CLEAR_REG(rr1)
+ vrev64.8 rhash, rhash /* byte-swap */
+ CLEAR_REG(rt0)
+ CLEAR_REG(rr0)
+ vext.8 rhash, rhash, rhash, #8
+ CLEAR_REG(rt1)
+ vst1.64 {rhash}, [r1]
+ CLEAR_REG(rhash)
+
+.Ldo_nothing:
+ mov r0, #0
+ pop {r4-r6, pc}
+.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;
+
+
+/*
+ * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_setup_armv8_ce_pmull
+.type _gcry_ghash_setup_armv8_ce_pmull,%function;
+_gcry_ghash_setup_armv8_ce_pmull:
+ /* input:
+ * r0: gcm_key
+ * r1: gcm_table
+ */
+
+ vpush {q4-q7}
+
+ GET_DATA_POINTER(r2, .Lrconst64, r3)
+
+ vld1.64 {rrconst_h}, [r2]
+
+#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \
+ /* H <<< 1 */ \
+ vshr.s64 ma, ib, #63; \
+ vshr.u64 oa, ib, #63; \
+ vshr.u64 ob, ia, #63; \
+ vand ma, const_d; \
+ vshl.u64 ib, ib, #1; \
+ vshl.u64 ia, ia, #1; \
+ vorr ob, ib; \
+ vorr oa, ia; \
+ veor ob, ma; \
+ vst1.64 {oa, ob}, [r_out]
+
+ vld1.64 {rhash}, [r0]
+ vrev64.8 rhash, rhash /* byte-swap */
+ vext.8 rhash, rhash, rhash, #8
+
+ vmov rbuf1, rhash
+ GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */
+
+ /* H² */
+ PMUL_128x128(rr0, rr1, rbuf1, rh1, rt0, __)
+ REDUCTION(rh2, rr0, rr1, rrconst_h, rt0, __)
+ vmov rhash, rh2
+ GCM_LSH_1(r1, rh2_l, rh2_h, rrconst_h, rbuf1_l, rbuf1_h, rt1_l) /* H²<<<1 */
+ add r1, r1, #16
+
+ /* H³ */
+ PMUL_128x128(rr0, rr1, rhash, rh1, rt1, __)
+ REDUCTION(rh3, rr0, rr1, rrconst_h, rt1, __)
+
+ /* H⁴ */
+ PMUL_128x128(rr0, rr1, rhash, rbuf1, rt0, __)
+ REDUCTION(rh4, rr0, rr1, rrconst_h, rt0, __)
+
+ GCM_LSH_1(r1, rh3_l, rh3_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H³<<<1 */
+ add r1, r1, #16
+ GCM_LSH_1(r1, rh4_l, rh4_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H⁴<<<1 */
+
+ CLEAR_REG(rt0)
+ CLEAR_REG(rt1)
+ CLEAR_REG(rr1)
+ CLEAR_REG(rr0)
+ CLEAR_REG(rh1)
+ CLEAR_REG(rh2)
+ CLEAR_REG(rh3)
+ CLEAR_REG(rh4)
+ CLEAR_REG(rhash)
+ CLEAR_REG(rbuf1)
+ CLEAR_REG(rrconst)
+ vpop {q4-q7}
+ bx lr
+.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S
new file mode 100644
index 0000000000..877207d3e5
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -0,0 +1,424 @@
+/* cipher-gcm-armv8-aarch64-ce.S - ARM/CE accelerated GHASH
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Constants */
+
+.align 4
+gcry_gcm_reduction_constant:
+.Lrconst:
+ .quad 0x87
+
+
+/* Register macros */
+
+#define rhash v0
+#define rr0 v1
+#define rr1 v2
+#define rbuf v3
+#define rbuf1 v4
+#define rbuf2 v5
+#define rbuf3 v6
+#define rbuf4 v7
+#define rbuf5 v8
+#define rr2 v9
+#define rr3 v10
+#define rr4 v11
+#define rr5 v12
+#define rr6 v13
+#define rr7 v14
+#define rr8 v15
+#define rr9 v16
+
+#define rrconst v18
+#define rh1 v19
+#define rh2 v20
+#define rh3 v21
+#define rh4 v22
+#define rh5 v23
+#define rh6 v24
+#define t0 v25
+#define t1 v26
+#define t2 v27
+#define t3 v28
+#define t4 v29
+#define t5 v30
+#define vZZ v31
+
+/* GHASH macros */
+
+/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+
+/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) */
+#define PMUL_128x128(r0, r1, a, b, T0, T1, interleave_op) \
+ ext T0.16b, b.16b, b.16b, #8; \
+ pmull r0.1q, a.1d, b.1d; \
+ pmull2 r1.1q, a.2d, b.2d; \
+ pmull T1.1q, a.1d, T0.1d; \
+ pmull2 T0.1q, a.2d, T0.2d; \
+ interleave_op; \
+ eor T0.16b, T0.16b, T1.16b; \
+ ext T1.16b, vZZ.16b, T0.16b, #8; \
+ ext T0.16b, T0.16b, vZZ.16b, #8; \
+ eor r0.16b, r0.16b, T1.16b; \
+ eor r1.16b, r1.16b, T0.16b;
+
+/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A)
+ * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B)
+ * Input: 'aC' and 'bC', Output: 'r0C:r1C' (low 128-bits in r0C, high in r1C)
+ */
+#define PMUL_128x128_3(r0A, r1A, aA, bA, t0A, t1A, \
+ r0B, r1B, aB, bB, t0B, t1B, \
+ r0C, r1C, aC, bC, t0C, t1C, interleave_op) \
+ ext t0A.16b, bA.16b, bA.16b, #8; \
+ pmull r0A.1q, aA.1d, bA.1d; \
+ pmull2 r1A.1q, aA.2d, bA.2d; \
+ ext t0B.16b, bB.16b, bB.16b, #8; \
+ pmull r0B.1q, aB.1d, bB.1d; \
+ pmull2 r1B.1q, aB.2d, bB.2d; \
+ ext t0C.16b, bC.16b, bC.16b, #8; \
+ pmull r0C.1q, aC.1d, bC.1d; \
+ pmull2 r1C.1q, aC.2d, bC.2d; \
+ pmull t1A.1q, aA.1d, t0A.1d; \
+ pmull2 t0A.1q, aA.2d, t0A.2d; \
+ pmull t1B.1q, aB.1d, t0B.1d; \
+ pmull2 t0B.1q, aB.2d, t0B.2d; \
+ pmull t1C.1q, aC.1d, t0C.1d; \
+ pmull2 t0C.1q, aC.2d, t0C.2d; \
+ eor t0A.16b, t0A.16b, t1A.16b; \
+ eor t0B.16b, t0B.16b, t1B.16b; \
+ eor t0C.16b, t0C.16b, t1C.16b; \
+ interleave_op; \
+ ext t1A.16b, vZZ.16b, t0A.16b, #8; \
+ ext t0A.16b, t0A.16b, vZZ.16b, #8; \
+ ext t1B.16b, vZZ.16b, t0B.16b, #8; \
+ ext t0B.16b, t0B.16b, vZZ.16b, #8; \
+ ext t1C.16b, vZZ.16b, t0C.16b, #8; \
+ ext t0C.16b, t0C.16b, vZZ.16b, #8; \
+ eor r0A.16b, r0A.16b, t1A.16b; \
+ eor r1A.16b, r1A.16b, t0A.16b; \
+ eor r0B.16b, r0B.16b, t1B.16b; \
+ eor r1B.16b, r1B.16b, t0B.16b; \
+ eor r0C.16b, r0C.16b, t1C.16b; \
+ eor r1C.16b, r1C.16b, t0C.16b; \
+
+/* Input: 'r0:r1', Output: 'a' */
+#define REDUCTION(a, r0, r1, rconst, T0, T1, interleave_op1, interleave_op2, \
+ interleave_op3) \
+ pmull2 T0.1q, r1.2d, rconst.2d; \
+ interleave_op1; \
+ ext T1.16b, T0.16b, vZZ.16b, #8; \
+ ext T0.16b, vZZ.16b, T0.16b, #8; \
+ interleave_op2; \
+ eor r1.16b, r1.16b, T1.16b; \
+ eor r0.16b, r0.16b, T0.16b; \
+ pmull T0.1q, r1.1d, rconst.1d; \
+ interleave_op3; \
+ eor a.16b, r0.16b, T0.16b;
+
+/* Other functional macros */
+
+#define _(...) __VA_ARGS__
+#define __ _()
+
+#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b;
+
+#define VPUSH_ABI \
+ stp d8, d9, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16); \
+ stp d10, d11, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16); \
+ stp d12, d13, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16); \
+ stp d14, d15, [sp, #-16]!; \
+ CFI_ADJUST_CFA_OFFSET(16);
+
+#define VPOP_ABI \
+ ldp d14, d15, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16); \
+ ldp d12, d13, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16); \
+ ldp d10, d11, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16); \
+ ldp d8, d9, [sp], #16; \
+ CFI_ADJUST_CFA_OFFSET(-16);
+
+/*
+ * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result,
+ * const byte *buf, size_t nblocks,
+ * void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_armv8_ce_pmull
+ELF(.type _gcry_ghash_armv8_ce_pmull,%function;)
+_gcry_ghash_armv8_ce_pmull:
+ /* input:
+ * x0: gcm_key
+ * x1: result/hash
+ * x2: buf
+ * x3: nblocks
+ * x4: gcm_table
+ */
+ CFI_STARTPROC();
+
+ cbz x3, .Ldo_nothing;
+
+ GET_DATA_POINTER(x5, .Lrconst)
+
+ eor vZZ.16b, vZZ.16b, vZZ.16b
+ ld1 {rhash.16b}, [x1]
+ ld1 {rh1.16b}, [x0]
+
+ rbit rhash.16b, rhash.16b /* bit-swap */
+ ld1r {rrconst.2d}, [x5]
+
+ cmp x3, #6
+ b.lo .Less_than_6
+
+ add x6, x4, #64
+ VPUSH_ABI
+
+ ld1 {rh2.16b-rh5.16b}, [x4]
+ ld1 {rh6.16b}, [x6]
+
+ sub x3, x3, #6
+
+ ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16)
+ ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16)
+
+ rbit rbuf.16b, rbuf.16b /* bit-swap */
+ rbit rbuf1.16b, rbuf1.16b /* bit-swap */
+ rbit rbuf2.16b, rbuf2.16b /* bit-swap */
+ rbit rbuf3.16b, rbuf3.16b /* bit-swap */
+ rbit rbuf4.16b, rbuf4.16b /* bit-swap */
+ rbit rbuf5.16b, rbuf5.16b /* bit-swap */
+ eor rhash.16b, rhash.16b, rbuf.16b
+
+ cmp x3, #6
+ b.lo .Lend_6
+
+.Loop_6:
+
+ /* (in1) * H⁵ => rr0:rr1 */
+ /* (in2) * H⁴ => rr2:rr3 */
+ /* (in0 ^ hash) * H⁶ => rr4:rr5 */
+ PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1,
+ rr2, rr3, rbuf2, rh4, t2, t3,
+ rr4, rr5, rhash, rh6, t4, t5,
+ _(sub x3, x3, #6))
+
+ ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16)
+ cmp x3, #6
+
+ eor rr0.16b, rr0.16b, rr2.16b
+ eor rr1.16b, rr1.16b, rr3.16b
+
+ /* (in3) * H³ => rr2:rr3 */
+ /* (in4) * H² => rr6:rr7 */
+ /* (in5) * H¹ => rr8:rr9 */
+ PMUL_128x128_3(rr2, rr3, rbuf3, rh3, t0, t1,
+ rr6, rr7, rbuf4, rh2, t2, t3,
+ rr8, rr9, rbuf5, rh1, t4, t5,
+ _(eor rr0.16b, rr0.16b, rr4.16b;
+ eor rr1.16b, rr1.16b, rr5.16b))
+
+ eor rr0.16b, rr0.16b, rr2.16b
+ eor rr1.16b, rr1.16b, rr3.16b
+ rbit rbuf.16b, rbuf.16b
+ eor rr0.16b, rr0.16b, rr6.16b
+ eor rr1.16b, rr1.16b, rr7.16b
+ rbit rbuf1.16b, rbuf1.16b
+ eor rr0.16b, rr0.16b, rr8.16b
+ eor rr1.16b, rr1.16b, rr9.16b
+ ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16)
+
+ REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+ _(rbit rbuf2.16b, rbuf2.16b),
+ _(rbit rbuf3.16b, rbuf3.16b),
+ _(rbit rbuf4.16b, rbuf4.16b))
+
+ rbit rbuf5.16b, rbuf5.16b
+ eor rhash.16b, rhash.16b, rbuf.16b
+
+ b.hs .Loop_6
+
+.Lend_6:
+
+ /* (in1) * H⁵ => rr0:rr1 */
+ /* (in0 ^ hash) * H⁶ => rr2:rr3 */
+ /* (in2) * H⁴ => rr4:rr5 */
+ PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1,
+ rr2, rr3, rhash, rh6, t2, t3,
+ rr4, rr5, rbuf2, rh4, t4, t5,
+ __)
+ eor rr0.16b, rr0.16b, rr2.16b
+ eor rr1.16b, rr1.16b, rr3.16b
+ eor rr0.16b, rr0.16b, rr4.16b
+ eor rr1.16b, rr1.16b, rr5.16b
+
+ /* (in3) * H³ => rhash:rbuf */
+ /* (in4) * H² => rr6:rr7 */
+ /* (in5) * H¹ => rr8:rr9 */
+ PMUL_128x128_3(rhash, rbuf, rbuf3, rh3, t0, t1,
+ rr6, rr7, rbuf4, rh2, t2, t3,
+ rr8, rr9, rbuf5, rh1, t4, t5,
+ _(CLEAR_REG(rh4);
+ CLEAR_REG(rh5);
+ CLEAR_REG(rh6)))
+ eor rr0.16b, rr0.16b, rhash.16b
+ eor rr1.16b, rr1.16b, rbuf.16b
+ eor rr0.16b, rr0.16b, rr6.16b
+ eor rr1.16b, rr1.16b, rr7.16b
+ eor rr0.16b, rr0.16b, rr8.16b
+ eor rr1.16b, rr1.16b, rr9.16b
+
+ REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+ _(CLEAR_REG(rh2);
+ CLEAR_REG(rh3);
+ CLEAR_REG(rr2);
+ CLEAR_REG(rbuf2);
+ CLEAR_REG(rbuf3)),
+ _(CLEAR_REG(rr3);
+ CLEAR_REG(rr4);
+ CLEAR_REG(rr5);
+ CLEAR_REG(rr6);
+ CLEAR_REG(rr7)),
+ _(CLEAR_REG(rr8);
+ CLEAR_REG(rr9);
+ CLEAR_REG(rbuf1);
+ CLEAR_REG(rbuf2)))
+
+ CLEAR_REG(rbuf4)
+ CLEAR_REG(rbuf5)
+ CLEAR_REG(t2)
+ CLEAR_REG(t3)
+ CLEAR_REG(t4)
+ CLEAR_REG(t5)
+
+ VPOP_ABI
+
+ cbz x3, .Ldone
+
+.Less_than_6:
+ /* Handle remaining blocks. */
+
+ ld1 {rbuf.16b}, [x2], #16
+ sub x3, x3, #1
+
+ rbit rbuf.16b, rbuf.16b /* bit-swap */
+
+ eor rhash.16b, rhash.16b, rbuf.16b
+
+ cbz x3, .Lend
+
+.Loop:
+ PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(ld1 {rbuf.16b}, [x2], #16))
+ REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+ _(sub x3, x3, #1),
+ _(rbit rbuf.16b, rbuf.16b),
+ __)
+ eor rhash.16b, rhash.16b, rbuf.16b
+
+ cbnz x3, .Loop
+
+.Lend:
+ PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(CLEAR_REG(rbuf)))
+ REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, __, _(CLEAR_REG(rh1)), __)
+
+.Ldone:
+ CLEAR_REG(rr1)
+ CLEAR_REG(rr0)
+ rbit rhash.16b, rhash.16b /* bit-swap */
+ CLEAR_REG(t0)
+ CLEAR_REG(t1)
+
+ st1 {rhash.2d}, [x1]
+ CLEAR_REG(rhash)
+
+.Ldo_nothing:
+ mov x0, #0
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;)
+
+
+/*
+ * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_setup_armv8_ce_pmull
+ELF(.type _gcry_ghash_setup_armv8_ce_pmull,%function;)
+_gcry_ghash_setup_armv8_ce_pmull:
+ /* input:
+ * x0: gcm_key
+ * x1: gcm_table
+ */
+ CFI_STARTPROC()
+
+ GET_DATA_POINTER(x2, .Lrconst)
+
+ eor vZZ.16b, vZZ.16b, vZZ.16b
+
+ /* H¹ */
+ ld1 {rh1.16b}, [x0]
+ rbit rh1.16b, rh1.16b
+ st1 {rh1.16b}, [x0]
+
+ ld1r {rrconst.2d}, [x2]
+
+ /* H² */
+ PMUL_128x128(rr0, rr1, rh1, rh1, t0, t1, __)
+ REDUCTION(rh2, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+ /* H³ */
+ PMUL_128x128(rr0, rr1, rh2, rh1, t0, t1, __)
+ REDUCTION(rh3, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+ /* H⁴ */
+ PMUL_128x128(rr0, rr1, rh2, rh2, t0, t1, __)
+ REDUCTION(rh4, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+ /* H⁵ */
+ PMUL_128x128(rr0, rr1, rh2, rh3, t0, t1, __)
+ REDUCTION(rh5, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+ /* H⁶ */
+ PMUL_128x128(rr0, rr1, rh3, rh3, t0, t1, __)
+ REDUCTION(rh6, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+ st1 {rh2.16b-rh4.16b}, [x1], #(3*16)
+ st1 {rh5.16b-rh6.16b}, [x1]
+
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;)
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c b/comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c
new file mode 100644
index 0000000000..28165c653f
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c
@@ -0,0 +1,712 @@
+/* cipher-gcm-intel-pclmul.c - Intel PCLMUL accelerated Galois Counter Mode
+ * implementation
+ * Copyright (C) 2013-2014,2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+#ifdef GCM_USE_INTEL_PCLMUL
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+# pragma GCC target("no-sse")
+#endif
+#if __clang__
+# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+
+
+/*
+ Intel PCLMUL ghash based on white paper:
+ "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the
+ GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis.
+ */
+static ASM_FUNC_ATTR_INLINE void reduction(void)
+{
+ /* input: <xmm1:xmm3> */
+
+ asm volatile (/* first phase of the reduction */
+ "movdqa %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm3, %%xmm5\n\t"
+ "psllq $1, %%xmm6\n\t" /* packed right shifting << 63 */
+ "pxor %%xmm3, %%xmm6\n\t"
+ "psllq $57, %%xmm5\n\t" /* packed right shifting << 57 */
+ "psllq $62, %%xmm6\n\t" /* packed right shifting << 62 */
+ "pxor %%xmm5, %%xmm6\n\t" /* xor the shifted versions */
+ "pshufd $0x6a, %%xmm6, %%xmm5\n\t"
+ "pshufd $0xae, %%xmm6, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm3\n\t" /* first phase of the reduction
+ complete */
+
+ /* second phase of the reduction */
+ "pxor %%xmm3, %%xmm1\n\t" /* xor the shifted versions */
+ "psrlq $1, %%xmm3\n\t" /* packed left shifting >> 1 */
+ "pxor %%xmm3, %%xmm6\n\t"
+ "psrlq $1, %%xmm3\n\t" /* packed left shifting >> 2 */
+ "pxor %%xmm3, %%xmm1\n\t"
+ "psrlq $5, %%xmm3\n\t" /* packed left shifting >> 7 */
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm6, %%xmm1\n\t" /* the result is in xmm1 */
+ ::: "memory" );
+}
+
+static ASM_FUNC_ATTR_INLINE void gfmul_pclmul(void)
+{
+ /* Input: XMM0 and XMM1, Output: XMM1. Input XMM0 stays unmodified.
+ Input must be converted to little-endian.
+ */
+ asm volatile (/* gfmul, xmm0 has operator a and xmm1 has operator b. */
+ "pshufd $78, %%xmm0, %%xmm2\n\t"
+ "pshufd $78, %%xmm1, %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 holds a0+a1 */
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds b0+b1 */
+
+ "movdqa %%xmm0, %%xmm3\n\t"
+ "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds a0*b0 */
+ "pclmulqdq $17, %%xmm0, %%xmm1\n\t" /* xmm6 holds a1*b1 */
+ "movdqa %%xmm3, %%xmm5\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm4\n\t" /* xmm4 holds (a0+a1)*(b0+b1) */
+
+ "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */
+ "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "psrldq $8, %%xmm4\n\t"
+ "pslldq $8, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm1\n\t" /* <xmm1:xmm3> holds the result of the
+ carry-less multiplication of xmm0
+ by xmm1 */
+ ::: "memory" );
+
+ reduction();
+}
+
+static ASM_FUNC_ATTR_INLINE void
+gfmul_pclmul_aggr4(const void *buf, const void *h_1, const void *h_table,
+ const unsigned char *be_mask)
+{
+ /* Input:
+ Hash: XMM1
+ Output:
+ Hash: XMM1
+ */
+ asm volatile (/* perform clmul and merge results... */
+ "movdqu 2*16(%[h_table]), %%xmm2\n\t" /* Load H4 */
+ "movdqu 0*16(%[buf]), %%xmm5\n\t"
+ "pshufb %[be_mask], %%xmm5\n\t" /* be => le */
+ "pxor %%xmm5, %%xmm1\n\t"
+
+ "pshufd $78, %%xmm2, %%xmm5\n\t"
+ "pshufd $78, %%xmm1, %%xmm4\n\t"
+ "pxor %%xmm2, %%xmm5\n\t" /* xmm5 holds 4:a0+a1 */
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds 4:b0+b1 */
+ "movdqa %%xmm2, %%xmm3\n\t"
+ "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds 4:a0*b0 */
+ "pclmulqdq $17, %%xmm2, %%xmm1\n\t" /* xmm1 holds 4:a1*b1 */
+ "pclmulqdq $0, %%xmm5, %%xmm4\n\t" /* xmm4 holds 4:(a0+a1)*(b0+b1) */
+
+ "movdqu 1*16(%[h_table]), %%xmm5\n\t" /* Load H3 */
+ "movdqu 1*16(%[buf]), %%xmm2\n\t"
+ "pshufb %[be_mask], %%xmm2\n\t" /* be => le */
+
+ "pshufd $78, %%xmm5, %%xmm0\n\t"
+ "pshufd $78, %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 3:a0+a1 */
+ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */
+ "movdqa %%xmm5, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */
+ "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */
+ "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */
+
+ "movdqu 2*16(%[buf]), %%xmm5\n\t"
+ "pshufb %[be_mask], %%xmm5\n\t" /* be => le */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4:a0*b0 */
+ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4:(a0+a1)*(b0+b1) */
+
+ "movdqu 0*16(%[h_table]), %%xmm2\n\t" /* Load H2 */
+
+ "pshufd $78, %%xmm2, %%xmm0\n\t"
+ "pshufd $78, %%xmm5, %%xmm7\n\t"
+ "pxor %%xmm2, %%xmm0\n\t" /* xmm0 holds 2:a0+a1 */
+ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 2:b0+b1 */
+ "movdqa %%xmm2, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 2:a0*b0 */
+ "pclmulqdq $17, %%xmm2, %%xmm5\n\t" /* xmm5 holds 2:a1*b1 */
+ "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 2:(a0+a1)*(b0+b1) */
+
+ "movdqu 3*16(%[buf]), %%xmm2\n\t"
+ "pshufb %[be_mask], %%xmm2\n\t" /* be => le */
+ :
+ : [buf] "r" (buf),
+ [h_table] "r" (h_table),
+ [be_mask] "m" (*be_mask)
+ : "memory" );
+
+ asm volatile ("pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4:a0*b0 */
+ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4:(a0+a1)*(b0+b1) */
+
+ "movdqu %[h_1], %%xmm5\n\t" /* Load H1 */
+
+ "pshufd $78, %%xmm5, %%xmm0\n\t"
+ "pshufd $78, %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 1:a0+a1 */
+ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 1:b0+b1 */
+ "movdqa %%xmm5, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 1:a0*b0 */
+ "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 1:a1*b1 */
+ "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 1:(a0+a1)*(b0+b1) */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+4:a0*b0 */
+ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+4:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 1+2+3+4:(a0+a1)*(b0+b1) */
+
+ /* aggregated reduction... */
+ "movdqa %%xmm3, %%xmm5\n\t"
+ "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */
+ "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "psrldq $8, %%xmm4\n\t"
+ "pslldq $8, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm1\n\t" /* <xmm1:xmm3> holds the result of the
+ carry-less multiplication of xmm0
+ by xmm1 */
+ :
+ : [h_1] "m" (*(const unsigned char *)h_1)
+ : "memory" );
+
+ reduction();
+}
+
+#ifdef __x86_64__
+static ASM_FUNC_ATTR_INLINE void
+gfmul_pclmul_aggr8(const void *buf, const void *h_table)
+{
+ /* Input:
+ H¹: XMM0
+ bemask: XMM15
+ Hash: XMM1
+ Output:
+ Hash: XMM1
+ Inputs XMM0 and XMM15 stays unmodified.
+ */
+ asm volatile (/* Load H6, H7, H8. */
+ "movdqu 6*16(%[h_table]), %%xmm10\n\t"
+ "movdqu 5*16(%[h_table]), %%xmm9\n\t"
+ "movdqu 4*16(%[h_table]), %%xmm8\n\t"
+
+ /* perform clmul and merge results... */
+ "movdqu 0*16(%[buf]), %%xmm5\n\t"
+ "movdqu 1*16(%[buf]), %%xmm2\n\t"
+ "pshufb %%xmm15, %%xmm5\n\t" /* be => le */
+ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */
+ "pxor %%xmm5, %%xmm1\n\t"
+
+ "pshufd $78, %%xmm10, %%xmm5\n\t"
+ "pshufd $78, %%xmm1, %%xmm4\n\t"
+ "pxor %%xmm10, %%xmm5\n\t" /* xmm5 holds 8:a0+a1 */
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds 8:b0+b1 */
+ "movdqa %%xmm10, %%xmm3\n\t"
+ "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds 8:a0*b0 */
+ "pclmulqdq $17, %%xmm10, %%xmm1\n\t" /* xmm1 holds 8:a1*b1 */
+ "pclmulqdq $0, %%xmm5, %%xmm4\n\t" /* xmm4 holds 8:(a0+a1)*(b0+b1) */
+
+ "pshufd $78, %%xmm9, %%xmm11\n\t"
+ "pshufd $78, %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 7:a0+a1 */
+ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 7:b0+b1 */
+ "movdqa %%xmm9, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 7:a0*b0 */
+ "pclmulqdq $17, %%xmm9, %%xmm2\n\t" /* xmm2 holds 7:a1*b1 */
+ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 7:(a0+a1)*(b0+b1) */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 7+8:a0*b0 */
+ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 7+8:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 7+8:(a0+a1)*(b0+b1) */
+
+ "movdqu 2*16(%[buf]), %%xmm5\n\t"
+ "movdqu 3*16(%[buf]), %%xmm2\n\t"
+ "pshufb %%xmm15, %%xmm5\n\t" /* be => le */
+ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */
+
+ "pshufd $78, %%xmm8, %%xmm11\n\t"
+ "pshufd $78, %%xmm5, %%xmm7\n\t"
+ "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 6:a0+a1 */
+ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 6:b0+b1 */
+ "movdqa %%xmm8, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 6:a0*b0 */
+ "pclmulqdq $17, %%xmm8, %%xmm5\n\t" /* xmm5 holds 6:a1*b1 */
+ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 6:(a0+a1)*(b0+b1) */
+
+ /* Load H3, H4, H5. */
+ "movdqu 3*16(%[h_table]), %%xmm10\n\t"
+ "movdqu 2*16(%[h_table]), %%xmm9\n\t"
+ "movdqu 1*16(%[h_table]), %%xmm8\n\t"
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 6+7+8:a0*b0 */
+ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 6+7+8:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 6+7+8:(a0+a1)*(b0+b1) */
+
+ "pshufd $78, %%xmm10, %%xmm11\n\t"
+ "pshufd $78, %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm10, %%xmm11\n\t" /* xmm11 holds 5:a0+a1 */
+ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 5:b0+b1 */
+ "movdqa %%xmm10, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 5:a0*b0 */
+ "pclmulqdq $17, %%xmm10, %%xmm2\n\t" /* xmm2 holds 5:a1*b1 */
+ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 5:(a0+a1)*(b0+b1) */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 5+6+7+8:a0*b0 */
+ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 5+6+7+8:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 5+6+7+8:(a0+a1)*(b0+b1) */
+
+ "movdqu 4*16(%[buf]), %%xmm5\n\t"
+ "movdqu 5*16(%[buf]), %%xmm2\n\t"
+ "pshufb %%xmm15, %%xmm5\n\t" /* be => le */
+ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */
+
+ "pshufd $78, %%xmm9, %%xmm11\n\t"
+ "pshufd $78, %%xmm5, %%xmm7\n\t"
+ "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */
+ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 4:b0+b1 */
+ "movdqa %%xmm9, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 4:a0*b0 */
+ "pclmulqdq $17, %%xmm9, %%xmm5\n\t" /* xmm5 holds 4:a1*b1 */
+ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 4:(a0+a1)*(b0+b1) */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 4+5+6+7+8:a0*b0 */
+ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 4+5+6+7+8:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 4+5+6+7+8:(a0+a1)*(b0+b1) */
+
+ "pshufd $78, %%xmm8, %%xmm11\n\t"
+ "pshufd $78, %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */
+ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */
+ "movdqa %%xmm8, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */
+ "pclmulqdq $17, %%xmm8, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */
+ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */
+
+ "movdqu 0*16(%[h_table]), %%xmm8\n\t" /* Load H2 */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4+5+6+7+8:a0*b0 */
+ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4+5+6+7+8:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4+5+6+7+8:(a0+a1)*(b0+b1) */
+
+ "movdqu 6*16(%[buf]), %%xmm5\n\t"
+ "movdqu 7*16(%[buf]), %%xmm2\n\t"
+ "pshufb %%xmm15, %%xmm5\n\t" /* be => le */
+ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */
+
+ "pshufd $78, %%xmm8, %%xmm11\n\t"
+ "pshufd $78, %%xmm5, %%xmm7\n\t"
+ "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */
+ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 4:b0+b1 */
+ "movdqa %%xmm8, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 4:a0*b0 */
+ "pclmulqdq $17, %%xmm8, %%xmm5\n\t" /* xmm5 holds 4:a1*b1 */
+ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 4:(a0+a1)*(b0+b1) */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4+5+6+7+8:a0*b0 */
+ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4+5+6+7+8:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4+5+6+7+8:(a0+a1)*(b0+b1) */
+
+ "pshufd $78, %%xmm0, %%xmm11\n\t"
+ "pshufd $78, %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */
+ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */
+ "movdqa %%xmm0, %%xmm6\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */
+ "pclmulqdq $17, %%xmm0, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */
+ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */
+
+ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+3+4+5+6+7+8:a0*b0 */
+ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+3+4+5+6+7+8:a1*b1 */
+ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 1+2+3+3+4+5+6+7+8:(a0+a1)*(b0+b1) */
+
+ /* aggregated reduction... */
+ "movdqa %%xmm3, %%xmm5\n\t"
+ "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */
+ "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "psrldq $8, %%xmm4\n\t"
+ "pslldq $8, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm1\n\t" /* <xmm1:xmm3> holds the result of the
+ carry-less multiplication of xmm0
+ by xmm1 */
+ :
+ : [buf] "r" (buf),
+ [h_table] "r" (h_table)
+ : "memory" );
+
+ reduction();
+}
+#endif
+
+static ASM_FUNC_ATTR_INLINE void gcm_lsh(void *h, unsigned int hoffs)
+{
+ static const u64 pconst[2] __attribute__ ((aligned (16))) =
+ { U64_C(0x0000000000000001), U64_C(0xc200000000000000) };
+
+ asm volatile ("movdqu (%[h]), %%xmm2\n\t"
+ "pshufd $0xff, %%xmm2, %%xmm3\n\t"
+ "movdqa %%xmm2, %%xmm4\n\t"
+ "psrad $31, %%xmm3\n\t"
+ "pslldq $8, %%xmm4\n\t"
+ "pand %[pconst], %%xmm3\n\t"
+ "paddq %%xmm2, %%xmm2\n\t"
+ "psrlq $63, %%xmm4\n\t"
+ "pxor %%xmm3, %%xmm2\n\t"
+ "pxor %%xmm4, %%xmm2\n\t"
+ "movdqu %%xmm2, (%[h])\n\t"
+ :
+ : [pconst] "m" (pconst),
+ [h] "r" ((byte *)h + hoffs)
+ : "memory" );
+}
+
+void ASM_FUNC_ATTR
+_gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
+{
+ static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+#if defined(__x86_64__) && defined(__WIN64__)
+ char win64tmp[10 * 16];
+
+ /* XMM6-XMM15 need to be restored after use. */
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+ "movdqu %%xmm7, 1*16(%0)\n\t"
+ "movdqu %%xmm8, 2*16(%0)\n\t"
+ "movdqu %%xmm9, 3*16(%0)\n\t"
+ "movdqu %%xmm10, 4*16(%0)\n\t"
+ "movdqu %%xmm11, 5*16(%0)\n\t"
+ "movdqu %%xmm12, 6*16(%0)\n\t"
+ "movdqu %%xmm13, 7*16(%0)\n\t"
+ "movdqu %%xmm14, 8*16(%0)\n\t"
+ "movdqu %%xmm15, 9*16(%0)\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory" );
+#endif
+
+ /* Swap endianness of hsub. */
+ asm volatile ("movdqu (%[key]), %%xmm0\n\t"
+ "pshufb %[be_mask], %%xmm0\n\t"
+ "movdqu %%xmm0, (%[key])\n\t"
+ :
+ : [key] "r" (c->u_mode.gcm.u_ghash_key.key),
+ [be_mask] "m" (*be_mask)
+ : "memory");
+
+ gcm_lsh(c->u_mode.gcm.u_ghash_key.key, 0); /* H <<< 1 */
+
+ asm volatile ("movdqa %%xmm0, %%xmm1\n\t"
+ "movdqu (%[key]), %%xmm0\n\t" /* load H <<< 1 */
+ :
+ : [key] "r" (c->u_mode.gcm.u_ghash_key.key)
+ : "memory");
+
+ gfmul_pclmul (); /* H<<<1•H => H² */
+
+ asm volatile ("movdqu %%xmm1, 0*16(%[h_table])\n\t"
+ "movdqa %%xmm1, %%xmm7\n\t"
+ :
+ : [h_table] "r" (c->u_mode.gcm.gcm_table)
+ : "memory");
+
+ gcm_lsh(c->u_mode.gcm.gcm_table, 0 * 16); /* H² <<< 1 */
+ gfmul_pclmul (); /* H<<<1•H² => H³ */
+
+ asm volatile ("movdqa %%xmm7, %%xmm0\n\t"
+ "movdqu %%xmm1, 1*16(%[h_table])\n\t"
+ "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */
+ :
+ : [h_table] "r" (c->u_mode.gcm.gcm_table)
+ : "memory");
+
+ gfmul_pclmul (); /* H²<<<1•H² => H⁴ */
+
+ asm volatile ("movdqu %%xmm1, 2*16(%[h_table])\n\t"
+ "movdqa %%xmm1, %%xmm0\n\t"
+ "movdqu (%[key]), %%xmm1\n\t" /* load H <<< 1 */
+ :
+ : [h_table] "r" (c->u_mode.gcm.gcm_table),
+ [key] "r" (c->u_mode.gcm.u_ghash_key.key)
+ : "memory");
+
+ gcm_lsh(c->u_mode.gcm.gcm_table, 1 * 16); /* H³ <<< 1 */
+ gcm_lsh(c->u_mode.gcm.gcm_table, 2 * 16); /* H⁴ <<< 1 */
+
+#ifdef __x86_64__
+ gfmul_pclmul (); /* H<<<1•H⁴ => H⁵ */
+
+ asm volatile ("movdqu %%xmm1, 3*16(%[h_table])\n\t"
+ "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */
+ :
+ : [h_table] "r" (c->u_mode.gcm.gcm_table)
+ : "memory");
+
+ gfmul_pclmul (); /* H²<<<1•H⁴ => H⁶ */
+
+ asm volatile ("movdqu %%xmm1, 4*16(%[h_table])\n\t"
+ "movdqu 1*16(%[h_table]), %%xmm1\n\t" /* load H³ <<< 1 */
+ :
+ : [h_table] "r" (c->u_mode.gcm.gcm_table)
+ : "memory");
+
+ gfmul_pclmul (); /* H³<<<1•H⁴ => H⁷ */
+
+ asm volatile ("movdqu %%xmm1, 5*16(%[h_table])\n\t"
+ "movdqu 2*16(%[h_table]), %%xmm1\n\t" /* load H⁴ <<< 1 */
+ :
+ : [h_table] "r" (c->u_mode.gcm.gcm_table)
+ : "memory");
+
+ gfmul_pclmul (); /* H³<<<1•H⁴ => H⁸ */
+
+ asm volatile ("movdqu %%xmm1, 6*16(%[h_table])\n\t"
+ :
+ : [h_table] "r" (c->u_mode.gcm.gcm_table)
+ : "memory");
+
+ gcm_lsh(c->u_mode.gcm.gcm_table, 3 * 16); /* H⁵ <<< 1 */
+ gcm_lsh(c->u_mode.gcm.gcm_table, 4 * 16); /* H⁶ <<< 1 */
+ gcm_lsh(c->u_mode.gcm.gcm_table, 5 * 16); /* H⁷ <<< 1 */
+ gcm_lsh(c->u_mode.gcm.gcm_table, 6 * 16); /* H⁸ <<< 1 */
+
+#ifdef __WIN64__
+ /* Clear/restore used registers. */
+ asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm5\n\t"
+ "movdqu 0*16(%0), %%xmm6\n\t"
+ "movdqu 1*16(%0), %%xmm7\n\t"
+ "movdqu 2*16(%0), %%xmm8\n\t"
+ "movdqu 3*16(%0), %%xmm9\n\t"
+ "movdqu 4*16(%0), %%xmm10\n\t"
+ "movdqu 5*16(%0), %%xmm11\n\t"
+ "movdqu 6*16(%0), %%xmm12\n\t"
+ "movdqu 7*16(%0), %%xmm13\n\t"
+ "movdqu 8*16(%0), %%xmm14\n\t"
+ "movdqu 9*16(%0), %%xmm15\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory" );
+#else
+ /* Clear used registers. */
+ asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm5\n\t"
+ "pxor %%xmm6, %%xmm6\n\t"
+ "pxor %%xmm7, %%xmm7\n\t"
+ "pxor %%xmm8, %%xmm8\n\t"
+ "pxor %%xmm9, %%xmm9\n\t"
+ "pxor %%xmm10, %%xmm10\n\t"
+ "pxor %%xmm11, %%xmm11\n\t"
+ "pxor %%xmm12, %%xmm12\n\t"
+ "pxor %%xmm13, %%xmm13\n\t"
+ "pxor %%xmm14, %%xmm14\n\t"
+ "pxor %%xmm15, %%xmm15\n\t"
+ ::: "memory" );
+#endif
+#endif
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
+ size_t nblocks)
+{
+ static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+#if defined(__x86_64__) && defined(__WIN64__)
+ char win64tmp[10 * 16];
+#endif
+
+ if (nblocks == 0)
+ return 0;
+
+#if defined(__x86_64__) && defined(__WIN64__)
+ /* XMM6-XMM15 need to be restored after use. */
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+ "movdqu %%xmm7, 1*16(%0)\n\t"
+ "movdqu %%xmm8, 2*16(%0)\n\t"
+ "movdqu %%xmm9, 3*16(%0)\n\t"
+ "movdqu %%xmm10, 4*16(%0)\n\t"
+ "movdqu %%xmm11, 5*16(%0)\n\t"
+ "movdqu %%xmm12, 6*16(%0)\n\t"
+ "movdqu %%xmm13, 7*16(%0)\n\t"
+ "movdqu %%xmm14, 8*16(%0)\n\t"
+ "movdqu %%xmm15, 9*16(%0)\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory" );
+#endif
+
+ /* Preload hash. */
+ asm volatile ("movdqa %[be_mask], %%xmm7\n\t"
+ "movdqu %[hash], %%xmm1\n\t"
+ "pshufb %%xmm7, %%xmm1\n\t" /* be => le */
+ :
+ : [hash] "m" (*result),
+ [be_mask] "m" (*be_mask)
+ : "memory" );
+
+#ifdef __x86_64__
+ if (nblocks >= 8)
+ {
+ /* Preload H1. */
+ asm volatile ("movdqa %%xmm7, %%xmm15\n\t"
+ "movdqa %[h_1], %%xmm0\n\t"
+ :
+ : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key)
+ : "memory" );
+
+ while (nblocks >= 8)
+ {
+ gfmul_pclmul_aggr8 (buf, c->u_mode.gcm.gcm_table);
+
+ buf += 8 * blocksize;
+ nblocks -= 8;
+ }
+#ifndef __WIN64__
+ /* Clear used x86-64/XMM registers. */
+ asm volatile( "pxor %%xmm8, %%xmm8\n\t"
+ "pxor %%xmm9, %%xmm9\n\t"
+ "pxor %%xmm10, %%xmm10\n\t"
+ "pxor %%xmm11, %%xmm11\n\t"
+ "pxor %%xmm12, %%xmm12\n\t"
+ "pxor %%xmm13, %%xmm13\n\t"
+ "pxor %%xmm14, %%xmm14\n\t"
+ "pxor %%xmm15, %%xmm15\n\t"
+ ::: "memory" );
+#endif
+ }
+#endif
+
+ while (nblocks >= 4)
+ {
+ gfmul_pclmul_aggr4 (buf, c->u_mode.gcm.u_ghash_key.key,
+ c->u_mode.gcm.gcm_table, be_mask);
+
+ buf += 4 * blocksize;
+ nblocks -= 4;
+ }
+
+ if (nblocks)
+ {
+ /* Preload H1. */
+ asm volatile ("movdqa %[h_1], %%xmm0\n\t"
+ :
+ : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key)
+ : "memory" );
+
+ while (nblocks)
+ {
+ asm volatile ("movdqu %[buf], %%xmm2\n\t"
+ "pshufb %[be_mask], %%xmm2\n\t" /* be => le */
+ "pxor %%xmm2, %%xmm1\n\t"
+ :
+ : [buf] "m" (*buf), [be_mask] "m" (*be_mask)
+ : "memory" );
+
+ gfmul_pclmul ();
+
+ buf += blocksize;
+ nblocks--;
+ }
+ }
+
+ /* Store hash. */
+ asm volatile ("pshufb %[be_mask], %%xmm1\n\t" /* be => le */
+ "movdqu %%xmm1, %[hash]\n\t"
+ : [hash] "=m" (*result)
+ : [be_mask] "m" (*be_mask)
+ : "memory" );
+
+#if defined(__x86_64__) && defined(__WIN64__)
+ /* Clear/restore used registers. */
+ asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm5\n\t"
+ "movdqu 0*16(%0), %%xmm6\n\t"
+ "movdqu 1*16(%0), %%xmm7\n\t"
+ "movdqu 2*16(%0), %%xmm8\n\t"
+ "movdqu 3*16(%0), %%xmm9\n\t"
+ "movdqu 4*16(%0), %%xmm10\n\t"
+ "movdqu 5*16(%0), %%xmm11\n\t"
+ "movdqu 6*16(%0), %%xmm12\n\t"
+ "movdqu 7*16(%0), %%xmm13\n\t"
+ "movdqu 8*16(%0), %%xmm14\n\t"
+ "movdqu 9*16(%0), %%xmm15\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory" );
+#else
+ /* Clear used registers. */
+ asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm5\n\t"
+ "pxor %%xmm6, %%xmm6\n\t"
+ "pxor %%xmm7, %%xmm7\n\t"
+ ::: "memory" );
+#endif
+
+ return 0;
+}
+
+#if __clang__
+# pragma clang attribute pop
+#endif
+
+#endif /* GCM_USE_INTEL_PCLMUL */
diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm.c b/comm/third_party/libgcrypt/cipher/cipher-gcm.c
new file mode 100644
index 0000000000..7aad12776f
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-gcm.c
@@ -0,0 +1,1207 @@
+/* cipher-gcm.c - Generic Galois Counter Mode implementation
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ * Copyright (C) 2013, 2018-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+/* Helper macro to force alignment to 16 or 64 bytes. */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_64 __attribute__ ((aligned (64)))
+#else
+# define ATTR_ALIGNED_64
+#endif
+
+
+#ifdef GCM_USE_INTEL_PCLMUL
+extern void _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c);
+
+extern unsigned int _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result,
+ const byte *buf, size_t nblocks);
+#endif
+
+#ifdef GCM_USE_ARM_PMULL
+extern void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table);
+
+extern unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result,
+ const byte *buf, size_t nblocks,
+ void *gcm_table);
+
+static void
+ghash_setup_armv8_ce_pmull (gcry_cipher_hd_t c)
+{
+ _gcry_ghash_setup_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key,
+ c->u_mode.gcm.gcm_table);
+}
+
+static unsigned int
+ghash_armv8_ce_pmull (gcry_cipher_hd_t c, byte *result, const byte *buf,
+ size_t nblocks)
+{
+ return _gcry_ghash_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, result, buf,
+ nblocks, c->u_mode.gcm.gcm_table);
+}
+#endif /* GCM_USE_ARM_PMULL */
+
+#ifdef GCM_USE_ARM_NEON
+extern void _gcry_ghash_setup_armv7_neon (void *gcm_key);
+
+extern unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result,
+ const byte *buf, size_t nblocks);
+
+static void
+ghash_setup_armv7_neon (gcry_cipher_hd_t c)
+{
+ _gcry_ghash_setup_armv7_neon(c->u_mode.gcm.u_ghash_key.key);
+}
+
+static unsigned int
+ghash_armv7_neon (gcry_cipher_hd_t c, byte *result, const byte *buf,
+ size_t nblocks)
+{
+ return _gcry_ghash_armv7_neon(c->u_mode.gcm.u_ghash_key.key, result, buf,
+ nblocks);
+}
+#endif /* GCM_USE_ARM_NEON */
+
+#ifdef GCM_USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+ghash_s390x_kimd (gcry_cipher_hd_t c, byte *result, const byte *buf,
+ size_t nblocks)
+{
+ u128_t params[2];
+
+ memcpy (&params[0], result, 16);
+ memcpy (&params[1], c->u_mode.gcm.u_ghash_key.key, 16);
+
+ kimd_execute (KMID_FUNCTION_GHASH, &params, buf, nblocks * 16);
+
+ memcpy (result, &params[0], 16);
+ wipememory (params, sizeof(params));
+ return 0;
+}
+#endif /* GCM_USE_S390X_CRYPTO*/
+
+
+#ifdef GCM_USE_TABLES
+static struct
+{
+ volatile u32 counter_head;
+ u32 cacheline_align[64 / 4 - 1];
+ u16 R[256];
+ volatile u32 counter_tail;
+} gcm_table ATTR_ALIGNED_64 =
+ {
+ 0,
+ { 0, },
+ {
+ 0x0000, 0x01c2, 0x0384, 0x0246, 0x0708, 0x06ca, 0x048c, 0x054e,
+ 0x0e10, 0x0fd2, 0x0d94, 0x0c56, 0x0918, 0x08da, 0x0a9c, 0x0b5e,
+ 0x1c20, 0x1de2, 0x1fa4, 0x1e66, 0x1b28, 0x1aea, 0x18ac, 0x196e,
+ 0x1230, 0x13f2, 0x11b4, 0x1076, 0x1538, 0x14fa, 0x16bc, 0x177e,
+ 0x3840, 0x3982, 0x3bc4, 0x3a06, 0x3f48, 0x3e8a, 0x3ccc, 0x3d0e,
+ 0x3650, 0x3792, 0x35d4, 0x3416, 0x3158, 0x309a, 0x32dc, 0x331e,
+ 0x2460, 0x25a2, 0x27e4, 0x2626, 0x2368, 0x22aa, 0x20ec, 0x212e,
+ 0x2a70, 0x2bb2, 0x29f4, 0x2836, 0x2d78, 0x2cba, 0x2efc, 0x2f3e,
+ 0x7080, 0x7142, 0x7304, 0x72c6, 0x7788, 0x764a, 0x740c, 0x75ce,
+ 0x7e90, 0x7f52, 0x7d14, 0x7cd6, 0x7998, 0x785a, 0x7a1c, 0x7bde,
+ 0x6ca0, 0x6d62, 0x6f24, 0x6ee6, 0x6ba8, 0x6a6a, 0x682c, 0x69ee,
+ 0x62b0, 0x6372, 0x6134, 0x60f6, 0x65b8, 0x647a, 0x663c, 0x67fe,
+ 0x48c0, 0x4902, 0x4b44, 0x4a86, 0x4fc8, 0x4e0a, 0x4c4c, 0x4d8e,
+ 0x46d0, 0x4712, 0x4554, 0x4496, 0x41d8, 0x401a, 0x425c, 0x439e,
+ 0x54e0, 0x5522, 0x5764, 0x56a6, 0x53e8, 0x522a, 0x506c, 0x51ae,
+ 0x5af0, 0x5b32, 0x5974, 0x58b6, 0x5df8, 0x5c3a, 0x5e7c, 0x5fbe,
+ 0xe100, 0xe0c2, 0xe284, 0xe346, 0xe608, 0xe7ca, 0xe58c, 0xe44e,
+ 0xef10, 0xeed2, 0xec94, 0xed56, 0xe818, 0xe9da, 0xeb9c, 0xea5e,
+ 0xfd20, 0xfce2, 0xfea4, 0xff66, 0xfa28, 0xfbea, 0xf9ac, 0xf86e,
+ 0xf330, 0xf2f2, 0xf0b4, 0xf176, 0xf438, 0xf5fa, 0xf7bc, 0xf67e,
+ 0xd940, 0xd882, 0xdac4, 0xdb06, 0xde48, 0xdf8a, 0xddcc, 0xdc0e,
+ 0xd750, 0xd692, 0xd4d4, 0xd516, 0xd058, 0xd19a, 0xd3dc, 0xd21e,
+ 0xc560, 0xc4a2, 0xc6e4, 0xc726, 0xc268, 0xc3aa, 0xc1ec, 0xc02e,
+ 0xcb70, 0xcab2, 0xc8f4, 0xc936, 0xcc78, 0xcdba, 0xcffc, 0xce3e,
+ 0x9180, 0x9042, 0x9204, 0x93c6, 0x9688, 0x974a, 0x950c, 0x94ce,
+ 0x9f90, 0x9e52, 0x9c14, 0x9dd6, 0x9898, 0x995a, 0x9b1c, 0x9ade,
+ 0x8da0, 0x8c62, 0x8e24, 0x8fe6, 0x8aa8, 0x8b6a, 0x892c, 0x88ee,
+ 0x83b0, 0x8272, 0x8034, 0x81f6, 0x84b8, 0x857a, 0x873c, 0x86fe,
+ 0xa9c0, 0xa802, 0xaa44, 0xab86, 0xaec8, 0xaf0a, 0xad4c, 0xac8e,
+ 0xa7d0, 0xa612, 0xa454, 0xa596, 0xa0d8, 0xa11a, 0xa35c, 0xa29e,
+ 0xb5e0, 0xb422, 0xb664, 0xb7a6, 0xb2e8, 0xb32a, 0xb16c, 0xb0ae,
+ 0xbbf0, 0xba32, 0xb874, 0xb9b6, 0xbcf8, 0xbd3a, 0xbf7c, 0xbebe,
+ },
+ 0
+ };
+
+#define gcmR gcm_table.R
+
+static inline
+void prefetch_table(const void *tab, size_t len)
+{
+ const volatile byte *vtab = tab;
+ size_t i;
+
+ for (i = 0; len - i >= 8 * 32; i += 8 * 32)
+ {
+ (void)vtab[i + 0 * 32];
+ (void)vtab[i + 1 * 32];
+ (void)vtab[i + 2 * 32];
+ (void)vtab[i + 3 * 32];
+ (void)vtab[i + 4 * 32];
+ (void)vtab[i + 5 * 32];
+ (void)vtab[i + 6 * 32];
+ (void)vtab[i + 7 * 32];
+ }
+ for (; i < len; i += 32)
+ {
+ (void)vtab[i];
+ }
+
+ (void)vtab[len - 1];
+}
+
+static inline void
+do_prefetch_tables (const void *gcmM, size_t gcmM_size)
+{
+ /* Modify counters to trigger copy-on-write and unsharing if physical pages
+ * of look-up table are shared between processes. Modifying counters also
+ * causes checksums for pages to change and hint same-page merging algorithm
+ * that these pages are frequently changing. */
+ gcm_table.counter_head++;
+ gcm_table.counter_tail++;
+
+ /* Prefetch look-up tables to cache. */
+ prefetch_table(gcmM, gcmM_size);
+ prefetch_table(&gcm_table, sizeof(gcm_table));
+}
+
+#ifdef GCM_TABLES_USE_U64
+static void
+bshift (u64 * b0, u64 * b1)
+{
+ u64 t[2], mask;
+
+ t[0] = *b0;
+ t[1] = *b1;
+ mask = -(t[1] & 1) & 0xe1;
+ mask <<= 56;
+
+ *b1 = (t[1] >> 1) ^ (t[0] << 63);
+ *b0 = (t[0] >> 1) ^ mask;
+}
+
+static void
+do_fillM (unsigned char *h, u64 *M)
+{
+ int i, j;
+
+ M[0 + 0] = 0;
+ M[0 + 16] = 0;
+
+ M[8 + 0] = buf_get_be64 (h + 0);
+ M[8 + 16] = buf_get_be64 (h + 8);
+
+ for (i = 4; i > 0; i /= 2)
+ {
+ M[i + 0] = M[2 * i + 0];
+ M[i + 16] = M[2 * i + 16];
+
+ bshift (&M[i], &M[i + 16]);
+ }
+
+ for (i = 2; i < 16; i *= 2)
+ for (j = 1; j < i; j++)
+ {
+ M[(i + j) + 0] = M[i + 0] ^ M[j + 0];
+ M[(i + j) + 16] = M[i + 16] ^ M[j + 16];
+ }
+
+ for (i = 0; i < 16; i++)
+ {
+ M[i + 32] = (M[i + 0] >> 4) ^ ((u64) gcmR[(M[i + 16] & 0xf) << 4] << 48);
+ M[i + 48] = (M[i + 16] >> 4) ^ (M[i + 0] << 60);
+ }
+}
+
+static inline unsigned int
+do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM)
+{
+ u64 V[2];
+ u64 tmp[2];
+ const u64 *M;
+ u64 T;
+ u32 A;
+ int i;
+
+ cipher_block_xor (V, result, buf, 16);
+ V[0] = be_bswap64 (V[0]);
+ V[1] = be_bswap64 (V[1]);
+
+ /* First round can be manually tweaked based on fact that 'tmp' is zero. */
+ M = &gcmM[(V[1] & 0xf) + 32];
+ V[1] >>= 4;
+ tmp[0] = M[0];
+ tmp[1] = M[16];
+ tmp[0] ^= gcmM[(V[1] & 0xf) + 0];
+ tmp[1] ^= gcmM[(V[1] & 0xf) + 16];
+ V[1] >>= 4;
+
+ i = 6;
+ while (1)
+ {
+ M = &gcmM[(V[1] & 0xf) + 32];
+ V[1] >>= 4;
+
+ A = tmp[1] & 0xff;
+ T = tmp[0];
+ tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[1] & 0xf) + 0];
+ tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[1] & 0xf) + 16];
+
+ tmp[0] ^= M[0];
+ tmp[1] ^= M[16];
+
+ if (i == 0)
+ break;
+
+ V[1] >>= 4;
+ --i;
+ }
+
+ i = 7;
+ while (1)
+ {
+ M = &gcmM[(V[0] & 0xf) + 32];
+ V[0] >>= 4;
+
+ A = tmp[1] & 0xff;
+ T = tmp[0];
+ tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[0] & 0xf) + 0];
+ tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[0] & 0xf) + 16];
+
+ tmp[0] ^= M[0];
+ tmp[1] ^= M[16];
+
+ if (i == 0)
+ break;
+
+ V[0] >>= 4;
+ --i;
+ }
+
+ buf_put_be64 (result + 0, tmp[0]);
+ buf_put_be64 (result + 8, tmp[1]);
+
+ return (sizeof(V) + sizeof(T) + sizeof(tmp) +
+ sizeof(int)*2 + sizeof(void*)*5);
+}
+
+#else /*!GCM_TABLES_USE_U64*/
+
+static void
+bshift (u32 * M, int i)
+{
+ u32 t[4], mask;
+
+ t[0] = M[i * 4 + 0];
+ t[1] = M[i * 4 + 1];
+ t[2] = M[i * 4 + 2];
+ t[3] = M[i * 4 + 3];
+ mask = -(t[3] & 1) & 0xe1;
+
+ M[i * 4 + 3] = (t[3] >> 1) ^ (t[2] << 31);
+ M[i * 4 + 2] = (t[2] >> 1) ^ (t[1] << 31);
+ M[i * 4 + 1] = (t[1] >> 1) ^ (t[0] << 31);
+ M[i * 4 + 0] = (t[0] >> 1) ^ (mask << 24);
+}
+
+static void
+do_fillM (unsigned char *h, u32 *M)
+{
+ int i, j;
+
+ M[0 * 4 + 0] = 0;
+ M[0 * 4 + 1] = 0;
+ M[0 * 4 + 2] = 0;
+ M[0 * 4 + 3] = 0;
+
+ M[8 * 4 + 0] = buf_get_be32 (h + 0);
+ M[8 * 4 + 1] = buf_get_be32 (h + 4);
+ M[8 * 4 + 2] = buf_get_be32 (h + 8);
+ M[8 * 4 + 3] = buf_get_be32 (h + 12);
+
+ for (i = 4; i > 0; i /= 2)
+ {
+ M[i * 4 + 0] = M[2 * i * 4 + 0];
+ M[i * 4 + 1] = M[2 * i * 4 + 1];
+ M[i * 4 + 2] = M[2 * i * 4 + 2];
+ M[i * 4 + 3] = M[2 * i * 4 + 3];
+
+ bshift (M, i);
+ }
+
+ for (i = 2; i < 16; i *= 2)
+ for (j = 1; j < i; j++)
+ {
+ M[(i + j) * 4 + 0] = M[i * 4 + 0] ^ M[j * 4 + 0];
+ M[(i + j) * 4 + 1] = M[i * 4 + 1] ^ M[j * 4 + 1];
+ M[(i + j) * 4 + 2] = M[i * 4 + 2] ^ M[j * 4 + 2];
+ M[(i + j) * 4 + 3] = M[i * 4 + 3] ^ M[j * 4 + 3];
+ }
+
+ for (i = 0; i < 4 * 16; i += 4)
+ {
+ M[i + 0 + 64] = (M[i + 0] >> 4)
+ ^ ((u64) gcmR[(M[i + 3] << 4) & 0xf0] << 16);
+ M[i + 1 + 64] = (M[i + 1] >> 4) ^ (M[i + 0] << 28);
+ M[i + 2 + 64] = (M[i + 2] >> 4) ^ (M[i + 1] << 28);
+ M[i + 3 + 64] = (M[i + 3] >> 4) ^ (M[i + 2] << 28);
+ }
+}
+
+static inline unsigned int
+do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM)
+{
+ byte V[16];
+ u32 tmp[4];
+ u32 v;
+ const u32 *M, *m;
+ u32 T[3];
+ int i;
+
+ cipher_block_xor (V, result, buf, 16); /* V is big-endian */
+
+ /* First round can be manually tweaked based on fact that 'tmp' is zero. */
+ i = 15;
+
+ v = V[i];
+ M = &gcmM[(v & 0xf) * 4 + 64];
+ v = (v & 0xf0) >> 4;
+ m = &gcmM[v * 4];
+ v = V[--i];
+
+ tmp[0] = M[0] ^ m[0];
+ tmp[1] = M[1] ^ m[1];
+ tmp[2] = M[2] ^ m[2];
+ tmp[3] = M[3] ^ m[3];
+
+ while (1)
+ {
+ M = &gcmM[(v & 0xf) * 4 + 64];
+ v = (v & 0xf0) >> 4;
+ m = &gcmM[v * 4];
+
+ T[0] = tmp[0];
+ T[1] = tmp[1];
+ T[2] = tmp[2];
+ tmp[0] = (T[0] >> 8) ^ ((u32) gcmR[tmp[3] & 0xff] << 16) ^ m[0];
+ tmp[1] = (T[0] << 24) ^ (tmp[1] >> 8) ^ m[1];
+ tmp[2] = (T[1] << 24) ^ (tmp[2] >> 8) ^ m[2];
+ tmp[3] = (T[2] << 24) ^ (tmp[3] >> 8) ^ m[3];
+
+ tmp[0] ^= M[0];
+ tmp[1] ^= M[1];
+ tmp[2] ^= M[2];
+ tmp[3] ^= M[3];
+
+ if (i == 0)
+ break;
+
+ v = V[--i];
+ }
+
+ buf_put_be32 (result + 0, tmp[0]);
+ buf_put_be32 (result + 4, tmp[1]);
+ buf_put_be32 (result + 8, tmp[2]);
+ buf_put_be32 (result + 12, tmp[3]);
+
+ return (sizeof(V) + sizeof(T) + sizeof(tmp) +
+ sizeof(int)*2 + sizeof(void*)*6);
+}
+#endif /*!GCM_TABLES_USE_U64*/
+
+#define fillM(c) \
+ do_fillM (c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.gcm_table)
+#define GHASH(c, result, buf) do_ghash (result, buf, c->u_mode.gcm.gcm_table)
+#define prefetch_tables(c) \
+ do_prefetch_tables(c->u_mode.gcm.gcm_table, sizeof(c->u_mode.gcm.gcm_table))
+
+#else
+
+static unsigned long
+bshift (unsigned long *b)
+{
+ unsigned long c;
+ int i;
+ c = b[3] & 1;
+ for (i = 3; i > 0; i--)
+ {
+ b[i] = (b[i] >> 1) | (b[i - 1] << 31);
+ }
+ b[i] >>= 1;
+ return c;
+}
+
+static unsigned int
+do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf)
+{
+ unsigned long V[4];
+ int i, j;
+ byte *p;
+
+#ifdef WORDS_BIGENDIAN
+ p = result;
+#else
+ unsigned long T[4];
+
+ cipher_block_xor (V, result, buf, 16);
+ for (i = 0; i < 4; i++)
+ {
+ V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8;
+ V[i] = (V[i] & 0x0000ffff) << 16 | (V[i] & 0xffff0000) >> 16;
+ }
+ p = (byte *) T;
+#endif
+
+ memset (p, 0, 16);
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0x80; j; j >>= 1)
+ {
+ if (hsub[i] & j)
+ cipher_block_xor (p, p, V, 16);
+ if (bshift (V))
+ V[0] ^= 0xe1000000;
+ }
+ }
+#ifndef WORDS_BIGENDIAN
+ for (i = 0, p = (byte *) T; i < 16; i += 4, p += 4)
+ {
+ result[i + 0] = p[3];
+ result[i + 1] = p[2];
+ result[i + 2] = p[1];
+ result[i + 3] = p[0];
+ }
+#endif
+
+ return (sizeof(V) + sizeof(T) + sizeof(int)*2 + sizeof(void*)*5);
+}
+
+#define fillM(c) do { } while (0)
+#define GHASH(c, result, buf) do_ghash (c->u_mode.gcm.u_ghash_key.key, result, buf)
+#define prefetch_tables(c) do {} while (0)
+
+#endif /* !GCM_USE_TABLES */
+
+
+static unsigned int
+ghash_internal (gcry_cipher_hd_t c, byte *result, const byte *buf,
+ size_t nblocks)
+{
+ const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+ unsigned int burn = 0;
+
+ prefetch_tables (c);
+
+ while (nblocks)
+ {
+ burn = GHASH (c, result, buf);
+ buf += blocksize;
+ nblocks--;
+ }
+
+ return burn + (burn ? 5*sizeof(void*) : 0);
+}
+
+
+static void
+setupM (gcry_cipher_hd_t c)
+{
+#if defined(GCM_USE_INTEL_PCLMUL) || defined(GCM_USE_ARM_PMULL) || \
+ defined(GCM_USE_S390X_CRYPTO)
+ unsigned int features = _gcry_get_hw_features ();
+#endif
+
+ c->u_mode.gcm.ghash_fn = NULL;
+
+ if (0)
+ ;
+#ifdef GCM_USE_INTEL_PCLMUL
+ else if (features & HWF_INTEL_PCLMUL)
+ {
+ c->u_mode.gcm.ghash_fn = _gcry_ghash_intel_pclmul;
+ _gcry_ghash_setup_intel_pclmul (c);
+ }
+#endif
+#ifdef GCM_USE_ARM_PMULL
+ else if (features & HWF_ARM_PMULL)
+ {
+ c->u_mode.gcm.ghash_fn = ghash_armv8_ce_pmull;
+ ghash_setup_armv8_ce_pmull (c);
+ }
+#endif
+#ifdef GCM_USE_ARM_NEON
+ else if (features & HWF_ARM_NEON)
+ {
+ c->u_mode.gcm.ghash_fn = ghash_armv7_neon;
+ ghash_setup_armv7_neon (c);
+ }
+#endif
+#ifdef GCM_USE_S390X_CRYPTO
+ else if (features & HWF_S390X_MSA)
+ {
+ if (kimd_query () & km_function_to_mask (KMID_FUNCTION_GHASH))
+ {
+ c->u_mode.gcm.ghash_fn = ghash_s390x_kimd;
+ }
+ }
+#endif
+
+ if (c->u_mode.gcm.ghash_fn == NULL)
+ {
+ c->u_mode.gcm.ghash_fn = ghash_internal;
+ fillM (c);
+ }
+}
+
+
+static inline void
+gcm_bytecounter_add (u32 ctr[2], size_t add)
+{
+ if (sizeof(add) > sizeof(u32))
+ {
+ u32 high_add = ((add >> 31) >> 1) & 0xffffffff;
+ ctr[1] += high_add;
+ }
+
+ ctr[0] += add;
+ if (ctr[0] >= add)
+ return;
+ ++ctr[1];
+}
+
+
+static inline u32
+gcm_add32_be128 (byte *ctr, unsigned int add)
+{
+ /* 'ctr' must be aligned to four bytes. */
+ const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+ u32 *pval = (u32 *)(void *)(ctr + blocksize - sizeof(u32));
+ u32 val;
+
+ val = be_bswap32(*pval) + add;
+ *pval = be_bswap32(val);
+
+ return val; /* return result as host-endian value */
+}
+
+
+static inline int
+gcm_check_datalen (u32 ctr[2])
+{
+ /* len(plaintext) <= 2^39-256 bits == 2^36-32 bytes == 2^32-2 blocks */
+ if (ctr[1] > 0xfU)
+ return 0;
+ if (ctr[1] < 0xfU)
+ return 1;
+
+ if (ctr[0] <= 0xffffffe0U)
+ return 1;
+
+ return 0;
+}
+
+
+static inline int
+gcm_check_aadlen_or_ivlen (u32 ctr[2])
+{
+ /* len(aad/iv) <= 2^64-1 bits ~= 2^61-1 bytes */
+ if (ctr[1] > 0x1fffffffU)
+ return 0;
+ if (ctr[1] < 0x1fffffffU)
+ return 1;
+
+ if (ctr[0] <= 0xffffffffU)
+ return 1;
+
+ return 0;
+}
+
+
+static void
+do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf,
+ size_t buflen, int do_padding)
+{
+ unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+ unsigned int unused = c->u_mode.gcm.mac_unused;
+ ghash_fn_t ghash_fn = c->u_mode.gcm.ghash_fn;
+ size_t nblocks, n;
+ unsigned int burn = 0;
+
+ if (buflen == 0 && (unused == 0 || !do_padding))
+ return;
+
+ do
+ {
+ if (buflen > 0 && (buflen + unused < blocksize || unused > 0))
+ {
+ n = blocksize - unused;
+ n = n < buflen ? n : buflen;
+
+ buf_cpy (&c->u_mode.gcm.macbuf[unused], buf, n);
+
+ unused += n;
+ buf += n;
+ buflen -= n;
+ }
+ if (!buflen)
+ {
+ if (!do_padding && unused < blocksize)
+ {
+ break;
+ }
+
+ n = blocksize - unused;
+ if (n > 0)
+ {
+ memset (&c->u_mode.gcm.macbuf[unused], 0, n);
+ unused = blocksize;
+ }
+ }
+
+ if (unused > 0)
+ {
+ gcry_assert (unused == blocksize);
+
+ /* Process one block from macbuf. */
+ burn = ghash_fn (c, hash, c->u_mode.gcm.macbuf, 1);
+ unused = 0;
+ }
+
+ nblocks = buflen / blocksize;
+
+ if (nblocks)
+ {
+ burn = ghash_fn (c, hash, buf, nblocks);
+ buf += blocksize * nblocks;
+ buflen -= blocksize * nblocks;
+ }
+ }
+ while (buflen > 0);
+
+ c->u_mode.gcm.mac_unused = unused;
+
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+
+static gcry_err_code_t
+gcm_ctr_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ gcry_err_code_t err = 0;
+
+ while (inbuflen)
+ {
+ u32 nblocks_to_overflow;
+ u32 num_ctr_increments;
+ u32 curr_ctr_low;
+ size_t currlen = inbuflen;
+ byte ctr_copy[GCRY_GCM_BLOCK_LEN];
+ int fix_ctr = 0;
+
+ /* GCM CTR increments only least significant 32-bits, without carry
+ * to upper 96-bits of counter. Using generic CTR implementation
+ * directly would carry 32-bit overflow to upper 96-bit. Detect
+ * if input length is long enough to cause overflow, and limit
+ * input length so that CTR overflow happen but updated CTR value is
+ * not used to encrypt further input. After overflow, upper 96 bits
+ * of CTR are restored to cancel out modification done by generic CTR
+ * encryption. */
+
+ if (inbuflen > c->unused)
+ {
+ curr_ctr_low = gcm_add32_be128 (c->u_ctr.ctr, 0);
+
+ /* Number of CTR increments this inbuflen would cause. */
+ num_ctr_increments = (inbuflen - c->unused) / GCRY_GCM_BLOCK_LEN +
+ !!((inbuflen - c->unused) % GCRY_GCM_BLOCK_LEN);
+
+ if ((u32)(num_ctr_increments + curr_ctr_low) < curr_ctr_low)
+ {
+ nblocks_to_overflow = 0xffffffffU - curr_ctr_low + 1;
+ currlen = nblocks_to_overflow * GCRY_GCM_BLOCK_LEN + c->unused;
+ if (currlen > inbuflen)
+ {
+ currlen = inbuflen;
+ }
+
+ fix_ctr = 1;
+ cipher_block_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN);
+ }
+ }
+
+ err = _gcry_cipher_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen);
+ if (err != 0)
+ return err;
+
+ if (fix_ctr)
+ {
+ /* Lower 32-bits of CTR should now be zero. */
+ gcry_assert(gcm_add32_be128 (c->u_ctr.ctr, 0) == 0);
+
+ /* Restore upper part of CTR. */
+ buf_cpy(c->u_ctr.ctr, ctr_copy, GCRY_GCM_BLOCK_LEN - sizeof(u32));
+
+ wipememory(ctr_copy, sizeof(ctr_copy));
+ }
+
+ inbuflen -= currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ outbuf += currlen;
+ }
+
+ return err;
+}
+
+
+static gcry_err_code_t
+gcm_crypt_inner (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen, int encrypt)
+{
+ gcry_err_code_t err;
+
+ while (inbuflen)
+ {
+ size_t currlen = inbuflen;
+
+ /* Use a bulk method if available. */
+ if (c->bulk.gcm_crypt)
+ {
+ /* Bulk method requires that there is no cached data. */
+ if (inbuflen >= GCRY_GCM_BLOCK_LEN && c->u_mode.gcm.mac_unused == 0)
+ {
+ size_t nblks = inbuflen / GCRY_GCM_BLOCK_LEN;
+ size_t nleft;
+ size_t ndone;
+
+ nleft = c->bulk.gcm_crypt (c, outbuf, inbuf, nblks, encrypt);
+ ndone = nblks - nleft;
+
+ inbuf += ndone * GCRY_GCM_BLOCK_LEN;
+ outbuf += ndone * GCRY_GCM_BLOCK_LEN;
+ inbuflen -= ndone * GCRY_GCM_BLOCK_LEN;
+ outbuflen -= ndone * GCRY_GCM_BLOCK_LEN;
+
+ if (inbuflen == 0)
+ break;
+
+ currlen = inbuflen;
+ }
+ else if (c->u_mode.gcm.mac_unused > 0
+ && inbuflen >= GCRY_GCM_BLOCK_LEN
+ + (16 - c->u_mode.gcm.mac_unused))
+ {
+ /* Handle just enough data so that cache is depleted, and on
+ * next loop iteration use bulk method. */
+ currlen = 16 - c->u_mode.gcm.mac_unused;
+
+ gcry_assert(currlen);
+ }
+ }
+
+ /* Since checksumming is done after/before encryption/decryption,
+ * process input in 24KiB chunks to keep data loaded in L1 cache for
+ * checksumming/decryption. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ if (!encrypt)
+ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, inbuf, currlen, 0);
+
+ err = gcm_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen);
+ if (err != 0)
+ return err;
+
+ if (encrypt)
+ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, outbuf, currlen, 0);
+
+ outbuf += currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ inbuflen -= currlen;
+ }
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_encrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ static const unsigned char zerobuf[MAX_BLOCKSIZE];
+
+ if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN)
+ return GPG_ERR_CIPHER_ALGO;
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->u_mode.gcm.datalen_over_limits)
+ return GPG_ERR_INV_LENGTH;
+ if (c->marks.tag
+ || c->u_mode.gcm.ghash_data_finalized
+ || !c->u_mode.gcm.ghash_fn)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->marks.iv)
+ _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN);
+
+ if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->u_mode.gcm.ghash_aad_finalized)
+ {
+ /* Start of encryption marks end of AAD stream. */
+ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+ c->u_mode.gcm.ghash_aad_finalized = 1;
+ }
+
+ gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen);
+ if (!gcm_check_datalen(c->u_mode.gcm.datalen))
+ {
+ c->u_mode.gcm.datalen_over_limits = 1;
+ return GPG_ERR_INV_LENGTH;
+ }
+
+ return gcm_crypt_inner (c, outbuf, outbuflen, inbuf, inbuflen, 1);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_decrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ static const unsigned char zerobuf[MAX_BLOCKSIZE];
+
+ if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN)
+ return GPG_ERR_CIPHER_ALGO;
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->u_mode.gcm.datalen_over_limits)
+ return GPG_ERR_INV_LENGTH;
+ if (c->marks.tag
+ || c->u_mode.gcm.ghash_data_finalized
+ || !c->u_mode.gcm.ghash_fn)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->marks.iv)
+ _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN);
+
+ if (!c->u_mode.gcm.ghash_aad_finalized)
+ {
+ /* Start of decryption marks end of AAD stream. */
+ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+ c->u_mode.gcm.ghash_aad_finalized = 1;
+ }
+
+ gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen);
+ if (!gcm_check_datalen(c->u_mode.gcm.datalen))
+ {
+ c->u_mode.gcm.datalen_over_limits = 1;
+ return GPG_ERR_INV_LENGTH;
+ }
+
+ return gcm_crypt_inner (c, outbuf, outbuflen, inbuf, inbuflen, 0);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c,
+ const byte * aadbuf, size_t aadbuflen)
+{
+ static const unsigned char zerobuf[MAX_BLOCKSIZE];
+
+ if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN)
+ return GPG_ERR_CIPHER_ALGO;
+ if (c->u_mode.gcm.datalen_over_limits)
+ return GPG_ERR_INV_LENGTH;
+ if (c->marks.tag
+ || c->u_mode.gcm.ghash_aad_finalized
+ || c->u_mode.gcm.ghash_data_finalized
+ || !c->u_mode.gcm.ghash_fn)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->marks.iv)
+ _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN);
+
+ gcm_bytecounter_add(c->u_mode.gcm.aadlen, aadbuflen);
+ if (!gcm_check_aadlen_or_ivlen(c->u_mode.gcm.aadlen))
+ {
+ c->u_mode.gcm.datalen_over_limits = 1;
+ return GPG_ERR_INV_LENGTH;
+ }
+
+ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, aadbuf, aadbuflen, 0);
+
+ return 0;
+}
+
+
+void
+_gcry_cipher_gcm_setkey (gcry_cipher_hd_t c)
+{
+ memset (c->u_mode.gcm.u_ghash_key.key, 0, GCRY_GCM_BLOCK_LEN);
+
+ c->spec->encrypt (&c->context.c, c->u_mode.gcm.u_ghash_key.key,
+ c->u_mode.gcm.u_ghash_key.key);
+ setupM (c);
+}
+
+
+static gcry_err_code_t
+_gcry_cipher_gcm_initiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
+{
+ memset (c->u_mode.gcm.aadlen, 0, sizeof(c->u_mode.gcm.aadlen));
+ memset (c->u_mode.gcm.datalen, 0, sizeof(c->u_mode.gcm.datalen));
+ memset (c->u_mode.gcm.u_tag.tag, 0, GCRY_GCM_BLOCK_LEN);
+ c->u_mode.gcm.datalen_over_limits = 0;
+ c->u_mode.gcm.ghash_data_finalized = 0;
+ c->u_mode.gcm.ghash_aad_finalized = 0;
+
+ if (ivlen == 0)
+ return GPG_ERR_INV_LENGTH;
+
+ if (ivlen != GCRY_GCM_BLOCK_LEN - 4)
+ {
+ u32 iv_bytes[2] = {0, 0};
+ u32 bitlengths[2][2];
+
+ if (!c->u_mode.gcm.ghash_fn)
+ return GPG_ERR_INV_STATE;
+
+ memset(c->u_ctr.ctr, 0, GCRY_GCM_BLOCK_LEN);
+
+ gcm_bytecounter_add(iv_bytes, ivlen);
+ if (!gcm_check_aadlen_or_ivlen(iv_bytes))
+ {
+ c->u_mode.gcm.datalen_over_limits = 1;
+ return GPG_ERR_INV_LENGTH;
+ }
+
+ do_ghash_buf(c, c->u_ctr.ctr, iv, ivlen, 1);
+
+ /* iv length, 64-bit */
+ bitlengths[1][1] = be_bswap32(iv_bytes[0] << 3);
+ bitlengths[1][0] = be_bswap32((iv_bytes[0] >> 29) |
+ (iv_bytes[1] << 3));
+ /* zeros, 64-bit */
+ bitlengths[0][1] = 0;
+ bitlengths[0][0] = 0;
+
+ do_ghash_buf(c, c->u_ctr.ctr, (byte*)bitlengths, GCRY_GCM_BLOCK_LEN, 1);
+
+ wipememory (iv_bytes, sizeof iv_bytes);
+ wipememory (bitlengths, sizeof bitlengths);
+ }
+ else
+ {
+ /* 96-bit IV is handled differently. */
+ memcpy (c->u_ctr.ctr, iv, ivlen);
+ c->u_ctr.ctr[12] = c->u_ctr.ctr[13] = c->u_ctr.ctr[14] = 0;
+ c->u_ctr.ctr[15] = 1;
+ }
+
+ c->spec->encrypt (&c->context.c, c->u_mode.gcm.tagiv, c->u_ctr.ctr);
+
+ gcm_add32_be128 (c->u_ctr.ctr, 1);
+
+ c->unused = 0;
+ c->marks.iv = 1;
+ c->marks.tag = 0;
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
+{
+ c->marks.iv = 0;
+ c->marks.tag = 0;
+ c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0;
+
+ if (fips_mode ())
+ {
+ /* Direct invocation of GCM setiv in FIPS mode disables encryption. */
+ c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 1;
+ }
+
+ return _gcry_cipher_gcm_initiv (c, iv, ivlen);
+}
+
+
+#if 0 && TODO
+void
+_gcry_cipher_gcm_geniv (gcry_cipher_hd_t c,
+ byte *ivout, size_t ivoutlen, const byte *nonce,
+ size_t noncelen)
+{
+ /* nonce: user provided part (might be null) */
+ /* noncelen: check if proper length (if nonce not null) */
+ /* ivout: iv used to initialize gcm, output to user */
+ /* ivoutlen: check correct size */
+ byte iv[IVLEN];
+
+ if (!ivout)
+ return GPG_ERR_INV_ARG;
+ if (ivoutlen != IVLEN)
+ return GPG_ERR_INV_LENGTH;
+ if (nonce != NULL && !is_nonce_ok_len(noncelen))
+ return GPG_ERR_INV_ARG;
+
+ gcm_generate_iv(iv, nonce, noncelen);
+
+ c->marks.iv = 0;
+ c->marks.tag = 0;
+ c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0;
+
+ _gcry_cipher_gcm_initiv (c, iv, IVLEN);
+
+ buf_cpy(ivout, iv, IVLEN);
+ wipememory(iv, sizeof(iv));
+}
+#endif
+
+
+static int
+is_tag_length_valid(size_t taglen)
+{
+ switch (taglen)
+ {
+ /* Allowed tag lengths from NIST SP 800-38D. */
+ case 128 / 8: /* GCRY_GCM_BLOCK_LEN */
+ case 120 / 8:
+ case 112 / 8:
+ case 104 / 8:
+ case 96 / 8:
+ case 64 / 8:
+ case 32 / 8:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+static gcry_err_code_t
+_gcry_cipher_gcm_tag (gcry_cipher_hd_t c,
+ byte * outbuf, size_t outbuflen, int check)
+{
+ if (!(is_tag_length_valid (outbuflen) || outbuflen >= GCRY_GCM_BLOCK_LEN))
+ return GPG_ERR_INV_LENGTH;
+ if (c->u_mode.gcm.datalen_over_limits)
+ return GPG_ERR_INV_LENGTH;
+
+ if (!c->marks.tag)
+ {
+ u32 bitlengths[2][2];
+
+ if (!c->u_mode.gcm.ghash_fn)
+ return GPG_ERR_INV_STATE;
+
+ /* aad length */
+ bitlengths[0][1] = be_bswap32(c->u_mode.gcm.aadlen[0] << 3);
+ bitlengths[0][0] = be_bswap32((c->u_mode.gcm.aadlen[0] >> 29) |
+ (c->u_mode.gcm.aadlen[1] << 3));
+ /* data length */
+ bitlengths[1][1] = be_bswap32(c->u_mode.gcm.datalen[0] << 3);
+ bitlengths[1][0] = be_bswap32((c->u_mode.gcm.datalen[0] >> 29) |
+ (c->u_mode.gcm.datalen[1] << 3));
+
+ /* Finalize data-stream. */
+ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+ c->u_mode.gcm.ghash_aad_finalized = 1;
+ c->u_mode.gcm.ghash_data_finalized = 1;
+
+ /* Add bitlengths to tag. */
+ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths,
+ GCRY_GCM_BLOCK_LEN, 1);
+ cipher_block_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv,
+ c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN);
+ c->marks.tag = 1;
+
+ wipememory (bitlengths, sizeof (bitlengths));
+ wipememory (c->u_mode.gcm.macbuf, GCRY_GCM_BLOCK_LEN);
+ wipememory (c->u_mode.gcm.tagiv, GCRY_GCM_BLOCK_LEN);
+ wipememory (c->u_mode.gcm.aadlen, sizeof (c->u_mode.gcm.aadlen));
+ wipememory (c->u_mode.gcm.datalen, sizeof (c->u_mode.gcm.datalen));
+ }
+
+ if (!check)
+ {
+ if (outbuflen > GCRY_GCM_BLOCK_LEN)
+ outbuflen = GCRY_GCM_BLOCK_LEN;
+
+ /* NB: We already checked that OUTBUF is large enough to hold
+ * the result or has valid truncated length. */
+ memcpy (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen);
+ }
+ else
+ {
+ /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF
+ * and thus we need to compare its length first. */
+ if (!is_tag_length_valid (outbuflen)
+ || !buf_eq_const (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen))
+ return GPG_ERR_CHECKSUM;
+ }
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+ size_t taglen)
+{
+ /* Outputting authentication tag is part of encryption. */
+ if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode)
+ return GPG_ERR_INV_STATE;
+
+ return _gcry_cipher_gcm_tag (c, outtag, taglen, 0);
+}
+
+gcry_err_code_t
+_gcry_cipher_gcm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+ size_t taglen)
+{
+ return _gcry_cipher_gcm_tag (c, (unsigned char *) intag, taglen, 1);
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-internal.h b/comm/third_party/libgcrypt/cipher/cipher-internal.h
new file mode 100644
index 0000000000..59b36ce78b
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-internal.h
@@ -0,0 +1,809 @@
+/* cipher-internal.h - Internal defs for cipher.c
+ * Copyright (C) 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_CIPHER_INTERNAL_H
+#define G10_CIPHER_INTERNAL_H
+
+#include "./poly1305-internal.h"
+
+
+/* The maximum supported size of a block in bytes. */
+#define MAX_BLOCKSIZE 16
+
+/* The length for an OCB block. Although OCB supports any block
+ length it does not make sense to use a 64 bit blocklen (and cipher)
+ because this reduces the security margin to an unacceptable state.
+ Thus we require a cipher with 128 bit blocklength. */
+#define OCB_BLOCK_LEN (128/8)
+
+/* The size of the pre-computed L table for OCB. This takes the same
+ size as the table used for GCM and thus we don't save anything by
+ not using such a table. */
+#define OCB_L_TABLE_SIZE 16
+
+
+/* Check the above constants. */
+#if OCB_BLOCK_LEN > MAX_BLOCKSIZE
+# error OCB_BLOCKLEN > MAX_BLOCKSIZE
+#endif
+
+
+
+/* Magic values for the context structure. */
+#define CTX_MAGIC_NORMAL 0x24091964
+#define CTX_MAGIC_SECURE 0x46919042
+
+/* Try to use 16 byte aligned cipher context for better performance.
+ We use the aligned attribute, thus it is only possible to implement
+ this with gcc. */
+#undef NEED_16BYTE_ALIGNED_CONTEXT
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define NEED_16BYTE_ALIGNED_CONTEXT 1
+#endif
+
+/* Undef this symbol to trade GCM speed for 256 bytes of memory per context */
+#define GCM_USE_TABLES 1
+
+
+/* GCM_USE_INTEL_PCLMUL indicates whether to compile GCM with Intel PCLMUL
+ code. */
+#undef GCM_USE_INTEL_PCLMUL
+#if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES)
+# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+# if __GNUC__ >= 4
+# define GCM_USE_INTEL_PCLMUL 1
+# endif
+# endif
+#endif /* GCM_USE_INTEL_PCLMUL */
+
+/* GCM_USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */
+#undef GCM_USE_ARM_PMULL
+#if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(GCM_USE_TABLES)
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+# define GCM_USE_ARM_PMULL 1
+# elif defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+# define GCM_USE_ARM_PMULL 1
+# endif
+#endif /* GCM_USE_ARM_PMULL */
+
+/* GCM_USE_ARM_NEON indicates whether to compile GCM with ARMv7 NEON code. */
+#undef GCM_USE_ARM_NEON
+#if defined(GCM_USE_TABLES)
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON)
+# define GCM_USE_ARM_NEON 1
+#endif
+#endif /* GCM_USE_ARM_NEON */
+
+/* GCM_USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef GCM_USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define GCM_USE_S390X_CRYPTO 1
+#endif /* GCM_USE_S390X_CRYPTO */
+
+typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result,
+ const byte *buf, size_t nblocks);
+
+
+/* A structure with function pointers for mode operations. */
+typedef struct cipher_mode_ops
+{
+ gcry_err_code_t (*encrypt)(gcry_cipher_hd_t c, unsigned char *outbuf,
+ size_t outbuflen, const unsigned char *inbuf,
+ size_t inbuflen);
+ gcry_err_code_t (*decrypt)(gcry_cipher_hd_t c, unsigned char *outbuf,
+ size_t outbuflen, const unsigned char *inbuf,
+ size_t inbuflen);
+ gcry_err_code_t (*setiv)(gcry_cipher_hd_t c, const unsigned char *iv,
+ size_t ivlen);
+
+ gcry_err_code_t (*authenticate)(gcry_cipher_hd_t c,
+ const unsigned char *abuf, size_t abuflen);
+ gcry_err_code_t (*get_tag)(gcry_cipher_hd_t c, unsigned char *outtag,
+ size_t taglen);
+ gcry_err_code_t (*check_tag)(gcry_cipher_hd_t c, const unsigned char *intag,
+ size_t taglen);
+} cipher_mode_ops_t;
+
+
+/* A structure with function pointers for bulk operations. The cipher
+ algorithm setkey function initializes them when bulk operations are
+ available and the actual encryption routines use them if they are
+ not NULL. */
+typedef struct cipher_bulk_ops
+{
+ void (*cfb_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks);
+ void (*cfb_dec)(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks);
+ void (*cbc_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int cbc_mac);
+ void (*cbc_dec)(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks);
+ void (*ofb_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks);
+ void (*ctr_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks);
+ size_t (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt);
+ size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks);
+ void (*xts_crypt)(void *context, unsigned char *tweak, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt);
+ size_t (*gcm_crypt)(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt);
+} cipher_bulk_ops_t;
+
+
+/* A VIA processor with the Padlock engine as well as the Intel AES_NI
+ instructions require an alignment of most data on a 16 byte
+ boundary. Because we trick out the compiler while allocating the
+ context, the align attribute as used in rijndael.c does not work on
+ its own. Thus we need to make sure that the entire context
+ structure is a aligned on that boundary. We achieve this by
+ defining a new type and use that instead of our usual alignment
+ type. */
+typedef union
+{
+ PROPERLY_ALIGNED_TYPE foo;
+#ifdef NEED_16BYTE_ALIGNED_CONTEXT
+ char bar[16] __attribute__ ((aligned (16)));
+#endif
+ char c[1];
+} cipher_context_alignment_t;
+
+
+/* Storage structure for CMAC, for CMAC and EAX modes. */
+typedef struct {
+ /* The initialization vector. Also contains tag after finalization. */
+ union {
+ cipher_context_alignment_t iv_align;
+ unsigned char iv[MAX_BLOCKSIZE];
+ } u_iv;
+
+ /* Subkeys for tag creation, not cleared by gcry_cipher_reset. */
+ unsigned char subkeys[2][MAX_BLOCKSIZE];
+
+ /* Space to save partial input lengths for MAC. */
+ unsigned char macbuf[MAX_BLOCKSIZE];
+
+ int mac_unused; /* Number of unprocessed bytes in MACBUF. */
+ unsigned int tag:1; /* Set to 1 if tag has been finalized. */
+} gcry_cmac_context_t;
+
+
+/* The handle structure. */
+struct gcry_cipher_handle
+{
+ int magic;
+ size_t actual_handle_size; /* Allocated size of this handle. */
+ size_t handle_offset; /* Offset to the malloced block. */
+ gcry_cipher_spec_t *spec;
+
+ /* The algorithm id. This is a hack required because the module
+ interface does not easily allow to retrieve this value. */
+ int algo;
+
+ /* A structure with function pointers for mode operations. */
+ cipher_mode_ops_t mode_ops;
+
+ /* A structure with function pointers for bulk operations. Due to
+ limitations of the module system (we don't want to change the
+ API) we need to keep these function pointers here. */
+ cipher_bulk_ops_t bulk;
+
+ int mode;
+ unsigned int flags;
+
+ struct {
+ unsigned int key:1; /* Set to 1 if a key has been set. */
+ unsigned int iv:1; /* Set to 1 if a IV has been set. */
+ unsigned int tag:1; /* Set to 1 if a tag is finalized. */
+ unsigned int finalize:1; /* Next encrypt/decrypt has the final data. */
+ unsigned int allow_weak_key:1; /* Set to 1 if weak keys are allowed. */
+ } marks;
+
+ /* The initialization vector. For best performance we make sure
+ that it is properly aligned. In particular some implementations
+ of bulk operations expect an 16 byte aligned IV. IV is also used
+ to store CBC-MAC in CCM mode; counter IV is stored in U_CTR. For
+ OCB mode it is used for the offset value. */
+ union {
+ cipher_context_alignment_t iv_align;
+ unsigned char iv[MAX_BLOCKSIZE];
+ } u_iv;
+
+ /* The counter for CTR mode. This field is also used by AESWRAP and
+ thus we can't use the U_IV union. For OCB mode it is used for
+ the checksum. */
+ union {
+ cipher_context_alignment_t iv_align;
+ unsigned char ctr[MAX_BLOCKSIZE];
+ } u_ctr;
+
+ /* Space to save an IV or CTR for chaining operations. */
+ unsigned char lastiv[MAX_BLOCKSIZE];
+ int unused; /* Number of unused bytes in LASTIV. */
+
+ union {
+ /* Mode specific storage for CCM mode. */
+ struct {
+ u64 encryptlen;
+ u64 aadlen;
+ unsigned int authlen;
+
+ /* Space to save partial input lengths for MAC. */
+ unsigned char macbuf[GCRY_CCM_BLOCK_LEN];
+ int mac_unused; /* Number of unprocessed bytes in MACBUF. */
+
+ unsigned char s0[GCRY_CCM_BLOCK_LEN];
+
+ unsigned int nonce:1; /* Set to 1 if nonce has been set. */
+ unsigned int lengths:1; /* Set to 1 if CCM length parameters has been
+ processed. */
+ } ccm;
+
+ /* Mode specific storage for Poly1305 mode. */
+ struct {
+ /* byte counter for AAD. */
+ u32 aadcount[2];
+
+ /* byte counter for data. */
+ u32 datacount[2];
+
+ unsigned int aad_finalized:1;
+ unsigned int bytecount_over_limits:1;
+
+ poly1305_context_t ctx;
+ } poly1305;
+
+ /* Mode specific storage for CMAC mode. */
+ gcry_cmac_context_t cmac;
+
+ /* Mode specific storage for EAX mode. */
+ struct {
+ /* CMAC for header (AAD). */
+ gcry_cmac_context_t cmac_header;
+
+ /* CMAC for ciphertext. */
+ gcry_cmac_context_t cmac_ciphertext;
+ } eax;
+
+ /* Mode specific storage for GCM mode. */
+ struct {
+ /* The interim tag for GCM mode. */
+ union {
+ cipher_context_alignment_t iv_align;
+ unsigned char tag[MAX_BLOCKSIZE];
+ } u_tag;
+
+ /* Space to save partial input lengths for MAC. */
+ unsigned char macbuf[GCRY_CCM_BLOCK_LEN];
+ int mac_unused; /* Number of unprocessed bytes in MACBUF. */
+
+ /* byte counters for GCM */
+ u32 aadlen[2];
+ u32 datalen[2];
+
+ /* encrypted tag counter */
+ unsigned char tagiv[MAX_BLOCKSIZE];
+
+ unsigned int ghash_data_finalized:1;
+ unsigned int ghash_aad_finalized:1;
+
+ unsigned int datalen_over_limits:1;
+ unsigned int disallow_encryption_because_of_setiv_in_fips_mode:1;
+
+ /* --- Following members are not cleared in gcry_cipher_reset --- */
+
+ /* GHASH multiplier from key. */
+ union {
+ cipher_context_alignment_t iv_align;
+ unsigned char key[MAX_BLOCKSIZE];
+ } u_ghash_key;
+
+ /* GHASH implementation in use. */
+ ghash_fn_t ghash_fn;
+
+ /* Pre-calculated table for GCM. */
+#ifdef GCM_USE_TABLES
+ #if (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__))
+ #define GCM_TABLES_USE_U64 1
+ u64 gcm_table[4 * 16];
+ #else
+ #undef GCM_TABLES_USE_U64
+ u32 gcm_table[8 * 16];
+ #endif
+#endif
+ } gcm;
+
+ /* Mode specific storage for OCB mode. */
+ struct {
+ /* --- Following members are not cleared in gcry_cipher_reset --- */
+
+ /* Helper variables and pre-computed table of L values. */
+ unsigned char L_star[OCB_BLOCK_LEN];
+ unsigned char L_dollar[OCB_BLOCK_LEN];
+ unsigned char L0L1[OCB_BLOCK_LEN];
+ unsigned char L[OCB_L_TABLE_SIZE][OCB_BLOCK_LEN];
+
+ /* --- Following members are cleared in gcry_cipher_reset --- */
+
+ /* The tag is valid if marks.tag has been set. */
+ unsigned char tag[OCB_BLOCK_LEN];
+
+ /* A buffer to hold the offset for the AAD processing. */
+ unsigned char aad_offset[OCB_BLOCK_LEN];
+
+ /* A buffer to hold the current sum of AAD processing. We can't
+ use tag here because tag may already hold the preprocessed
+ checksum of the data. */
+ unsigned char aad_sum[OCB_BLOCK_LEN];
+
+ /* A buffer to store AAD data not yet processed. */
+ unsigned char aad_leftover[OCB_BLOCK_LEN];
+
+ /* Number of data/aad blocks processed so far. */
+ u64 data_nblocks;
+ u64 aad_nblocks;
+
+ /* Number of valid bytes in AAD_LEFTOVER. */
+ unsigned char aad_nleftover;
+
+ /* Length of the tag. Fixed for now but may eventually be
+ specified using a set of gcry_cipher_flags. */
+ unsigned char taglen;
+
+ /* Flags indicating that the final data/aad block has been
+ processed. */
+ unsigned int data_finalized:1;
+ unsigned int aad_finalized:1;
+ } ocb;
+
+ /* Mode specific storage for XTS mode. */
+ struct {
+ /* Pointer to tweak cipher context, allocated after actual
+ * cipher context. */
+ char *tweak_context;
+ } xts;
+ } u_mode;
+
+ /* What follows are two contexts of the cipher in use. The first
+ one needs to be aligned well enough for the cipher operation
+ whereas the second one is a copy created by cipher_setkey and
+ used by cipher_reset. That second copy has no need for proper
+ aligment because it is only accessed by memcpy. */
+ cipher_context_alignment_t context;
+};
+
+
+/*-- cipher-cbc.c --*/
+gcry_err_code_t _gcry_cipher_cbc_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cbc_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cbc_cts_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cbc_cts_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+
+/*-- cipher-cfb.c --*/
+gcry_err_code_t _gcry_cipher_cfb_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cfb_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cfb8_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cfb8_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+
+
+/*-- cipher-ofb.c --*/
+gcry_err_code_t _gcry_cipher_ofb_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+
+/*-- cipher-ctr.c --*/
+gcry_err_code_t _gcry_cipher_ctr_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+
+
+/*-- cipher-aeswrap.c --*/
+gcry_err_code_t _gcry_cipher_aeswrap_encrypt
+/* */ (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_aeswrap_decrypt
+/* */ (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen);
+
+
+/*-- cipher-ccm.c --*/
+gcry_err_code_t _gcry_cipher_ccm_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ccm_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ccm_set_nonce
+/* */ (gcry_cipher_hd_t c, const unsigned char *nonce,
+ size_t noncelen);
+gcry_err_code_t _gcry_cipher_ccm_authenticate
+/* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen);
+gcry_err_code_t _gcry_cipher_ccm_set_lengths
+/* */ (gcry_cipher_hd_t c, u64 encryptedlen, u64 aadlen, u64 taglen);
+gcry_err_code_t _gcry_cipher_ccm_get_tag
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_ccm_check_tag
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *intag, size_t taglen);
+
+
+/*-- cipher-cmac.c --*/
+gcry_err_code_t _gcry_cmac_generate_subkeys
+/* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx);
+gcry_err_code_t _gcry_cmac_write
+/* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
+ const byte * inbuf, size_t inlen);
+gcry_err_code_t _gcry_cmac_final
+/* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx);
+void _gcry_cmac_reset (gcry_cmac_context_t *ctx);
+
+
+/*-- cipher-eax.c --*/
+gcry_err_code_t _gcry_cipher_eax_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_eax_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_eax_set_nonce
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *nonce, size_t noncelen);
+gcry_err_code_t _gcry_cipher_eax_authenticate
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *aadbuf, size_t aadbuflen);
+gcry_err_code_t _gcry_cipher_eax_get_tag
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_eax_check_tag
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *intag, size_t taglen);
+gcry_err_code_t _gcry_cipher_eax_setkey
+/* */ (gcry_cipher_hd_t c);
+
+
+/*-- cipher-gcm.c --*/
+gcry_err_code_t _gcry_cipher_gcm_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_gcm_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_gcm_setiv
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *iv, size_t ivlen);
+gcry_err_code_t _gcry_cipher_gcm_authenticate
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *aadbuf, size_t aadbuflen);
+gcry_err_code_t _gcry_cipher_gcm_get_tag
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_gcm_check_tag
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *intag, size_t taglen);
+void _gcry_cipher_gcm_setkey
+/* */ (gcry_cipher_hd_t c);
+
+
+/*-- cipher-poly1305.c --*/
+gcry_err_code_t _gcry_cipher_poly1305_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_poly1305_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_poly1305_setiv
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *iv, size_t ivlen);
+gcry_err_code_t _gcry_cipher_poly1305_authenticate
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *aadbuf, size_t aadbuflen);
+gcry_err_code_t _gcry_cipher_poly1305_get_tag
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_poly1305_check_tag
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *intag, size_t taglen);
+void _gcry_cipher_poly1305_setkey
+/* */ (gcry_cipher_hd_t c);
+
+
+/*-- chacha20.c --*/
+gcry_err_code_t _gcry_chacha20_poly1305_encrypt
+/* */ (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf,
+ size_t length);
+gcry_err_code_t _gcry_chacha20_poly1305_decrypt
+/* */ (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf,
+ size_t length);
+
+
+/*-- cipher-ocb.c --*/
+gcry_err_code_t _gcry_cipher_ocb_encrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ocb_decrypt
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ocb_set_nonce
+/* */ (gcry_cipher_hd_t c, const unsigned char *nonce,
+ size_t noncelen);
+gcry_err_code_t _gcry_cipher_ocb_authenticate
+/* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen);
+gcry_err_code_t _gcry_cipher_ocb_get_tag
+/* */ (gcry_cipher_hd_t c,
+ unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_ocb_check_tag
+/* */ (gcry_cipher_hd_t c,
+ const unsigned char *intag, size_t taglen);
+void _gcry_cipher_ocb_setkey
+/* */ (gcry_cipher_hd_t c);
+
+
+/*-- cipher-xts.c --*/
+gcry_err_code_t _gcry_cipher_xts_encrypt
+/* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_xts_decrypt
+/* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen);
+
+
+/* Return the L-value for block N. Note: 'cipher_ocb.c' ensures that N
+ * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can
+ * be directly passed to _gcry_ctz() function and resulting index will
+ * never overflow the table. */
+static inline const unsigned char *
+ocb_get_l (gcry_cipher_hd_t c, u64 n)
+{
+ unsigned long ntz;
+
+#if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4)
+ /* Assumes that N != 0. */
+ asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [low] "r" ((unsigned long)n)
+ : "cc");
+#else
+ ntz = _gcry_ctz (n);
+#endif
+
+ return c->u_mode.ocb.L[ntz];
+}
+
+
+/* Return bit-shift of blocksize. */
+static inline unsigned int _gcry_blocksize_shift(gcry_cipher_hd_t c)
+{
+ /* Only blocksizes 8 and 16 are used. Return value in such way
+ * that compiler can optimize calling functions based on this. */
+ return c->spec->blocksize == 8 ? 3 : 4;
+}
+
+
+/* Optimized function for adding value to cipher block. */
+static inline void
+cipher_block_add(void *_dstsrc, unsigned int add, size_t blocksize)
+{
+ byte *dstsrc = _dstsrc;
+ u64 s[2];
+
+ if (blocksize == 8)
+ {
+ buf_put_be64(dstsrc + 0, buf_get_be64(dstsrc + 0) + add);
+ }
+ else /* blocksize == 16 */
+ {
+ s[0] = buf_get_be64(dstsrc + 8);
+ s[1] = buf_get_be64(dstsrc + 0);
+ s[0] += add;
+ s[1] += (s[0] < add);
+ buf_put_be64(dstsrc + 8, s[0]);
+ buf_put_be64(dstsrc + 0, s[1]);
+ }
+}
+
+
+/* Optimized function for cipher block copying */
+static inline void
+cipher_block_cpy(void *_dst, const void *_src, size_t blocksize)
+{
+ byte *dst = _dst;
+ const byte *src = _src;
+ u64 s[2];
+
+ if (blocksize == 8)
+ {
+ buf_put_he64(dst + 0, buf_get_he64(src + 0));
+ }
+ else /* blocksize == 16 */
+ {
+ s[0] = buf_get_he64(src + 0);
+ s[1] = buf_get_he64(src + 8);
+ buf_put_he64(dst + 0, s[0]);
+ buf_put_he64(dst + 8, s[1]);
+ }
+}
+
+
+/* Optimized function for cipher block xoring */
+static inline void
+cipher_block_xor(void *_dst, const void *_src1, const void *_src2,
+ size_t blocksize)
+{
+ byte *dst = _dst;
+ const byte *src1 = _src1;
+ const byte *src2 = _src2;
+ u64 s1[2];
+ u64 s2[2];
+
+ if (blocksize == 8)
+ {
+ buf_put_he64(dst + 0, buf_get_he64(src1 + 0) ^ buf_get_he64(src2 + 0));
+ }
+ else /* blocksize == 16 */
+ {
+ s1[0] = buf_get_he64(src1 + 0);
+ s1[1] = buf_get_he64(src1 + 8);
+ s2[0] = buf_get_he64(src2 + 0);
+ s2[1] = buf_get_he64(src2 + 8);
+ buf_put_he64(dst + 0, s1[0] ^ s2[0]);
+ buf_put_he64(dst + 8, s1[1] ^ s2[1]);
+ }
+}
+
+
+/* Optimized function for in-place cipher block xoring */
+static inline void
+cipher_block_xor_1(void *_dst, const void *_src, size_t blocksize)
+{
+ cipher_block_xor (_dst, _dst, _src, blocksize);
+}
+
+
+/* Optimized function for cipher block xoring with two destination cipher
+ blocks. Used mainly by CFB mode encryption. */
+static inline void
+cipher_block_xor_2dst(void *_dst1, void *_dst2, const void *_src,
+ size_t blocksize)
+{
+ byte *dst1 = _dst1;
+ byte *dst2 = _dst2;
+ const byte *src = _src;
+ u64 d2[2];
+ u64 s[2];
+
+ if (blocksize == 8)
+ {
+ d2[0] = buf_get_he64(dst2 + 0) ^ buf_get_he64(src + 0);
+ buf_put_he64(dst2 + 0, d2[0]);
+ buf_put_he64(dst1 + 0, d2[0]);
+ }
+ else /* blocksize == 16 */
+ {
+ s[0] = buf_get_he64(src + 0);
+ s[1] = buf_get_he64(src + 8);
+ d2[0] = buf_get_he64(dst2 + 0);
+ d2[1] = buf_get_he64(dst2 + 8);
+ d2[0] = d2[0] ^ s[0];
+ d2[1] = d2[1] ^ s[1];
+ buf_put_he64(dst2 + 0, d2[0]);
+ buf_put_he64(dst2 + 8, d2[1]);
+ buf_put_he64(dst1 + 0, d2[0]);
+ buf_put_he64(dst1 + 8, d2[1]);
+ }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+ Used by mainly CBC mode decryption. */
+static inline void
+cipher_block_xor_n_copy_2(void *_dst_xor, const void *_src_xor,
+ void *_srcdst_cpy, const void *_src_cpy,
+ size_t blocksize)
+{
+ byte *dst_xor = _dst_xor;
+ byte *srcdst_cpy = _srcdst_cpy;
+ const byte *src_xor = _src_xor;
+ const byte *src_cpy = _src_cpy;
+ u64 sc[2];
+ u64 sx[2];
+ u64 sdc[2];
+
+ if (blocksize == 8)
+ {
+ sc[0] = buf_get_he64(src_cpy + 0);
+ buf_put_he64(dst_xor + 0,
+ buf_get_he64(srcdst_cpy + 0) ^ buf_get_he64(src_xor + 0));
+ buf_put_he64(srcdst_cpy + 0, sc[0]);
+ }
+ else /* blocksize == 16 */
+ {
+ sc[0] = buf_get_he64(src_cpy + 0);
+ sc[1] = buf_get_he64(src_cpy + 8);
+ sx[0] = buf_get_he64(src_xor + 0);
+ sx[1] = buf_get_he64(src_xor + 8);
+ sdc[0] = buf_get_he64(srcdst_cpy + 0);
+ sdc[1] = buf_get_he64(srcdst_cpy + 8);
+ sx[0] ^= sdc[0];
+ sx[1] ^= sdc[1];
+ buf_put_he64(dst_xor + 0, sx[0]);
+ buf_put_he64(dst_xor + 8, sx[1]);
+ buf_put_he64(srcdst_cpy + 0, sc[0]);
+ buf_put_he64(srcdst_cpy + 8, sc[1]);
+ }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+ Used by mainly CFB mode decryption. */
+static inline void
+cipher_block_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src,
+ size_t blocksize)
+{
+ cipher_block_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, blocksize);
+}
+
+
+#endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/comm/third_party/libgcrypt/cipher/cipher-ocb.c b/comm/third_party/libgcrypt/cipher/cipher-ocb.c
new file mode 100644
index 0000000000..24db6a9e2c
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-ocb.c
@@ -0,0 +1,761 @@
+/* cipher-ocb.c - OCB cipher mode
+ * Copyright (C) 2015, 2016 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * OCB is covered by several patents but may be used freely by most
+ * software. See http://web.cs.ucdavis.edu/~rogaway/ocb/license.htm .
+ * In particular license 1 is suitable for Libgcrypt: See
+ * http://web.cs.ucdavis.edu/~rogaway/ocb/license1.pdf for the full
+ * license document; it basically says:
+ *
+ * License 1 — License for Open-Source Software Implementations of OCB
+ * (Jan 9, 2013)
+ *
+ * Under this license, you are authorized to make, use, and
+ * distribute open-source software implementations of OCB. This
+ * license terminates for you if you sue someone over their
+ * open-source software implementation of OCB claiming that you have
+ * a patent covering their implementation.
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+/* Double the OCB_BLOCK_LEN sized block B in-place. */
+static inline void
+double_block (u64 b[2])
+{
+ u64 l_0, l, r;
+
+ l = b[1];
+ r = b[0];
+
+ l_0 = -(l >> 63);
+ l = (l + l) ^ (r >> 63);
+ r = (r + r) ^ (l_0 & 135);
+
+ b[1] = l;
+ b[0] = r;
+}
+
+
+/* Copy OCB_BLOCK_LEN from buffer S starting at bit offset BITOFF to
+ * buffer D. */
+static void
+bit_copy (unsigned char d[16], const unsigned char s[24], unsigned int bitoff)
+{
+ u64 s0l, s1l, s1r, s2r;
+ unsigned int shift;
+ unsigned int byteoff;
+
+ byteoff = bitoff / 8;
+ shift = bitoff % 8;
+
+ s0l = buf_get_be64 (s + byteoff + 0);
+ s1l = buf_get_be64 (s + byteoff + 8);
+ s1r = shift ? s1l : 0;
+ s2r = shift ? buf_get_be64 (s + 16) << (8 * byteoff) : 0;
+
+ buf_put_be64 (d + 0, (s0l << shift) | (s1r >> ((64 - shift) & 63)));
+ buf_put_be64 (d + 8, (s1l << shift) | (s2r >> ((64 - shift) & 63)));
+}
+
+
+/* Get L_big value for block N, where N is multiple of 65536. */
+static void
+ocb_get_L_big (gcry_cipher_hd_t c, u64 n, unsigned char *l_buf)
+{
+ int ntz = _gcry_ctz64 (n);
+ u64 L[2];
+
+ gcry_assert(ntz >= OCB_L_TABLE_SIZE);
+
+ L[1] = buf_get_be64 (c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
+ L[0] = buf_get_be64 (c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1] + 8);
+
+ for (ntz -= OCB_L_TABLE_SIZE - 1; ntz; ntz--)
+ double_block (L);
+
+ buf_put_be64 (l_buf + 0, L[1]);
+ buf_put_be64 (l_buf + 8, L[0]);
+}
+
+
+/* Called after key has been set. Sets up L table. */
+void _gcry_cipher_ocb_setkey (gcry_cipher_hd_t c)
+{
+ unsigned char ktop[OCB_BLOCK_LEN];
+ unsigned int burn = 0;
+ unsigned int nburn;
+ u64 L[2];
+ int i;
+
+ /* L_star = E(zero_128) */
+ memset (ktop, 0, OCB_BLOCK_LEN);
+ nburn = c->spec->encrypt (&c->context.c, c->u_mode.ocb.L_star, ktop);
+ burn = nburn > burn ? nburn : burn;
+ /* L_dollar = double(L_star) */
+ L[1] = buf_get_be64 (c->u_mode.ocb.L_star);
+ L[0] = buf_get_be64 (c->u_mode.ocb.L_star + 8);
+ double_block (L);
+ buf_put_be64 (c->u_mode.ocb.L_dollar + 0, L[1]);
+ buf_put_be64 (c->u_mode.ocb.L_dollar + 8, L[0]);
+ /* L_0 = double(L_dollar), ... */
+ double_block (L);
+ buf_put_be64 (c->u_mode.ocb.L[0] + 0, L[1]);
+ buf_put_be64 (c->u_mode.ocb.L[0] + 8, L[0]);
+ for (i = 1; i < OCB_L_TABLE_SIZE; i++)
+ {
+ double_block (L);
+ buf_put_be64 (c->u_mode.ocb.L[i] + 0, L[1]);
+ buf_put_be64 (c->u_mode.ocb.L[i] + 8, L[0]);
+ }
+ /* Precalculated offset L0+L1 */
+ cipher_block_xor (c->u_mode.ocb.L0L1,
+ c->u_mode.ocb.L[0], c->u_mode.ocb.L[1], OCB_BLOCK_LEN);
+
+ /* Cleanup */
+ wipememory (ktop, sizeof ktop);
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
+}
+
+
+/* Set the nonce for OCB. This requires that the key has been set.
+ Using it again resets start a new encryption cycle using the same
+ key. */
+gcry_err_code_t
+_gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce,
+ size_t noncelen)
+{
+ unsigned char ktop[OCB_BLOCK_LEN];
+ unsigned char stretch[OCB_BLOCK_LEN + 8];
+ unsigned int bottom;
+ unsigned int burn = 0;
+ unsigned int nburn;
+
+ /* Check args. */
+ if (!c->marks.key)
+ return GPG_ERR_INV_STATE; /* Key must have been set first. */
+ switch (c->u_mode.ocb.taglen)
+ {
+ case 8:
+ case 12:
+ case 16:
+ break;
+ default:
+ return GPG_ERR_BUG; /* Invalid tag length. */
+ }
+
+ if (c->spec->blocksize != OCB_BLOCK_LEN)
+ return GPG_ERR_CIPHER_ALGO;
+ if (!nonce)
+ return GPG_ERR_INV_ARG;
+ /* 120 bit is the allowed maximum. In addition we impose a minimum
+ of 64 bit. */
+ if (noncelen > (120/8) || noncelen < (64/8) || noncelen >= OCB_BLOCK_LEN)
+ return GPG_ERR_INV_LENGTH;
+
+ /* Prepare the nonce. */
+ memset (ktop, 0, OCB_BLOCK_LEN);
+ buf_cpy (ktop + (OCB_BLOCK_LEN - noncelen), nonce, noncelen);
+ ktop[0] = ((c->u_mode.ocb.taglen * 8) % 128) << 1;
+ ktop[OCB_BLOCK_LEN - noncelen - 1] |= 1;
+ bottom = ktop[OCB_BLOCK_LEN - 1] & 0x3f;
+ ktop[OCB_BLOCK_LEN - 1] &= 0xc0; /* Zero the bottom bits. */
+ nburn = c->spec->encrypt (&c->context.c, ktop, ktop);
+ burn = nburn > burn ? nburn : burn;
+ /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */
+ cipher_block_cpy (stretch, ktop, OCB_BLOCK_LEN);
+ cipher_block_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8);
+ /* Offset_0 = Stretch[1+bottom..128+bottom]
+ (We use the IV field to store the offset) */
+ bit_copy (c->u_iv.iv, stretch, bottom);
+ c->marks.iv = 1;
+
+ /* Checksum_0 = zeros(128)
+ (We use the CTR field to store the checksum) */
+ memset (c->u_ctr.ctr, 0, OCB_BLOCK_LEN);
+
+ /* Clear AAD buffer. */
+ memset (c->u_mode.ocb.aad_offset, 0, OCB_BLOCK_LEN);
+ memset (c->u_mode.ocb.aad_sum, 0, OCB_BLOCK_LEN);
+
+ /* Setup other values. */
+ memset (c->lastiv, 0, sizeof(c->lastiv));
+ c->unused = 0;
+ c->marks.tag = 0;
+ c->marks.finalize = 0;
+ c->u_mode.ocb.data_nblocks = 0;
+ c->u_mode.ocb.aad_nblocks = 0;
+ c->u_mode.ocb.aad_nleftover = 0;
+ c->u_mode.ocb.data_finalized = 0;
+ c->u_mode.ocb.aad_finalized = 0;
+
+ /* log_printhex ("L_* ", c->u_mode.ocb.L_star, OCB_BLOCK_LEN); */
+ /* log_printhex ("L_$ ", c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN); */
+ /* log_printhex ("L_0 ", c->u_mode.ocb.L[0], OCB_BLOCK_LEN); */
+ /* log_printhex ("L_1 ", c->u_mode.ocb.L[1], OCB_BLOCK_LEN); */
+ /* log_debug ( "bottom : %u (decimal)\n", bottom); */
+ /* log_printhex ("Ktop ", ktop, OCB_BLOCK_LEN); */
+ /* log_printhex ("Stretch ", stretch, sizeof stretch); */
+ /* log_printhex ("Offset_0 ", c->u_iv.iv, OCB_BLOCK_LEN); */
+
+ /* Cleanup */
+ wipememory (ktop, sizeof ktop);
+ wipememory (stretch, sizeof stretch);
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
+
+ return 0;
+}
+
+
+/* Process additional authentication data. This implementation allows
+ to add additional authentication data at any time before the final
+ gcry_cipher_gettag. */
+gcry_err_code_t
+_gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
+ size_t abuflen)
+{
+ const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+ const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
+ unsigned char l_tmp[OCB_BLOCK_LEN];
+ unsigned int burn = 0;
+ unsigned int nburn;
+ size_t n;
+
+ /* Check that a nonce and thus a key has been set and that we have
+ not yet computed the tag. We also return an error if the aad has
+ been finalized (i.e. a short block has been processed). */
+ if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized)
+ return GPG_ERR_INV_STATE;
+
+ /* Check correct usage and arguments. */
+ if (c->spec->blocksize != OCB_BLOCK_LEN)
+ return GPG_ERR_CIPHER_ALGO;
+
+ /* Process remaining data from the last call first. */
+ if (c->u_mode.ocb.aad_nleftover)
+ {
+ n = abuflen;
+ if (n > OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover)
+ n = OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover;
+
+ buf_cpy (&c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover],
+ abuf, n);
+ c->u_mode.ocb.aad_nleftover += n;
+ abuf += n;
+ abuflen -= n;
+
+ if (c->u_mode.ocb.aad_nleftover == OCB_BLOCK_LEN)
+ {
+ c->u_mode.ocb.aad_nblocks++;
+
+ if ((c->u_mode.ocb.aad_nblocks % table_maxblks) == 0)
+ {
+ /* Table overflow, L needs to be generated. */
+ ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks + 1, l_tmp);
+ }
+ else
+ {
+ cipher_block_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+ OCB_BLOCK_LEN);
+ }
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+ c->u_mode.ocb.aad_nleftover = 0;
+ }
+ }
+
+ if (!abuflen)
+ {
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
+
+ return 0;
+ }
+
+ /* Full blocks handling. */
+ while (abuflen >= OCB_BLOCK_LEN)
+ {
+ size_t nblks = abuflen / OCB_BLOCK_LEN;
+ size_t nmaxblks;
+
+ /* Check how many blocks to process till table overflow. */
+ nmaxblks = (c->u_mode.ocb.aad_nblocks + 1) % table_maxblks;
+ nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
+
+ if (nmaxblks == 0)
+ {
+ /* Table overflow, generate L and process one block. */
+ c->u_mode.ocb.aad_nblocks++;
+ ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+ OCB_BLOCK_LEN);
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+ abuf += OCB_BLOCK_LEN;
+ abuflen -= OCB_BLOCK_LEN;
+ nblks--;
+
+ /* With overflow handled, retry loop again. Next overflow will
+ * happen after 65535 blocks. */
+ continue;
+ }
+
+ nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+ /* Use a bulk method if available. */
+ if (nblks && c->bulk.ocb_auth)
+ {
+ size_t nleft;
+ size_t ndone;
+
+ nleft = c->bulk.ocb_auth (c, abuf, nblks);
+ ndone = nblks - nleft;
+
+ abuf += ndone * OCB_BLOCK_LEN;
+ abuflen -= ndone * OCB_BLOCK_LEN;
+ nblks = nleft;
+ }
+
+ /* Hash all full blocks. */
+ while (nblks)
+ {
+ c->u_mode.ocb.aad_nblocks++;
+
+ gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+ ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+ OCB_BLOCK_LEN);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+ OCB_BLOCK_LEN);
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+ abuf += OCB_BLOCK_LEN;
+ abuflen -= OCB_BLOCK_LEN;
+ nblks--;
+ }
+ }
+
+ /* Store away the remaining data. */
+ if (abuflen)
+ {
+ n = abuflen;
+ if (n > OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover)
+ n = OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover;
+
+ buf_cpy (&c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover],
+ abuf, n);
+ c->u_mode.ocb.aad_nleftover += n;
+ abuf += n;
+ abuflen -= n;
+ }
+
+ gcry_assert (!abuflen);
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
+
+ return 0;
+}
+
+
+/* Hash final partial AAD block. */
+static void
+ocb_aad_finalize (gcry_cipher_hd_t c)
+{
+ unsigned char l_tmp[OCB_BLOCK_LEN];
+ unsigned int burn = 0;
+ unsigned int nburn;
+
+ /* Check that a nonce and thus a key has been set and that we have
+ not yet computed the tag. We also skip this if the aad has been
+ finalized. */
+ if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized)
+ return;
+ if (c->spec->blocksize != OCB_BLOCK_LEN)
+ return; /* Ooops. */
+
+ /* Hash final partial block if any. */
+ if (c->u_mode.ocb.aad_nleftover)
+ {
+ /* Offset_* = Offset_m xor L_* */
+ cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+ /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */
+ buf_cpy (l_tmp, c->u_mode.ocb.aad_leftover, c->u_mode.ocb.aad_nleftover);
+ memset (l_tmp + c->u_mode.ocb.aad_nleftover, 0,
+ OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover);
+ l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80;
+ cipher_block_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
+ /* Sum = Sum_m xor ENCIPHER(K, CipherInput) */
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+ c->u_mode.ocb.aad_nleftover = 0;
+ }
+
+ /* Mark AAD as finalized so that gcry_cipher_ocb_authenticate can
+ * return an erro when called again. */
+ c->u_mode.ocb.aad_finalized = 1;
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
+}
+
+
+
+/* Checksumming for encrypt and decrypt. */
+static void
+ocb_checksum (unsigned char *chksum, const unsigned char *plainbuf,
+ size_t nblks)
+{
+ while (nblks > 0)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ cipher_block_xor_1(chksum, plainbuf, OCB_BLOCK_LEN);
+
+ plainbuf += OCB_BLOCK_LEN;
+ nblks--;
+ }
+}
+
+
+/* Common code for encrypt and decrypt. */
+static gcry_err_code_t
+ocb_crypt (gcry_cipher_hd_t c, int encrypt,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+ const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
+ unsigned char l_tmp[OCB_BLOCK_LEN];
+ unsigned int burn = 0;
+ unsigned int nburn;
+ gcry_cipher_encrypt_t crypt_fn =
+ encrypt ? c->spec->encrypt : c->spec->decrypt;
+
+ /* Check that a nonce and thus a key has been set and that we are
+ not yet in end of data state. */
+ if (!c->marks.iv || c->u_mode.ocb.data_finalized)
+ return GPG_ERR_INV_STATE;
+
+ /* Check correct usage and arguments. */
+ if (c->spec->blocksize != OCB_BLOCK_LEN)
+ return GPG_ERR_CIPHER_ALGO;
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->marks.finalize)
+ ; /* Allow arbitarty length. */
+ else if ((inbuflen % OCB_BLOCK_LEN))
+ return GPG_ERR_INV_LENGTH; /* We support only full blocks for now. */
+
+ /* Full blocks handling. */
+ while (inbuflen >= OCB_BLOCK_LEN)
+ {
+ size_t nblks = inbuflen / OCB_BLOCK_LEN;
+ size_t nmaxblks;
+
+ /* Check how many blocks to process till table overflow. */
+ nmaxblks = (c->u_mode.ocb.data_nblocks + 1) % table_maxblks;
+ nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
+
+ if (nmaxblks == 0)
+ {
+ /* Table overflow, generate L and process one block. */
+ c->u_mode.ocb.data_nblocks++;
+ ocb_get_L_big(c, c->u_mode.ocb.data_nblocks, l_tmp);
+
+ if (encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, inbuf, 1);
+ }
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+ nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+ if (!encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, outbuf, 1);
+ }
+
+ inbuf += OCB_BLOCK_LEN;
+ inbuflen -= OCB_BLOCK_LEN;
+ outbuf += OCB_BLOCK_LEN;
+ outbuflen =- OCB_BLOCK_LEN;
+ nblks--;
+
+ /* With overflow handled, retry loop again. Next overflow will
+ * happen after 65535 blocks. */
+ continue;
+ }
+
+ nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+ /* Since checksum xoring is done before/after encryption/decryption,
+ process input in 24KiB chunks to keep data loaded in L1 cache for
+ checksumming. */
+ if (nblks > 24 * 1024 / OCB_BLOCK_LEN)
+ nblks = 24 * 1024 / OCB_BLOCK_LEN;
+
+ /* Use a bulk method if available. */
+ if (nblks && c->bulk.ocb_crypt)
+ {
+ size_t nleft;
+ size_t ndone;
+
+ nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
+ ndone = nblks - nleft;
+
+ inbuf += ndone * OCB_BLOCK_LEN;
+ outbuf += ndone * OCB_BLOCK_LEN;
+ inbuflen -= ndone * OCB_BLOCK_LEN;
+ outbuflen -= ndone * OCB_BLOCK_LEN;
+ nblks = nleft;
+ }
+
+ if (nblks)
+ {
+ size_t nblks_chksum = nblks;
+
+ if (encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, inbuf, nblks_chksum);
+ }
+
+ /* Encrypt all full blocks. */
+ while (nblks)
+ {
+ c->u_mode.ocb.data_nblocks++;
+
+ gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_iv.iv,
+ ocb_get_l (c, c->u_mode.ocb.data_nblocks),
+ OCB_BLOCK_LEN);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+ nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+ inbuf += OCB_BLOCK_LEN;
+ inbuflen -= OCB_BLOCK_LEN;
+ outbuf += OCB_BLOCK_LEN;
+ outbuflen =- OCB_BLOCK_LEN;
+ nblks--;
+ }
+
+ if (!encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr,
+ outbuf - nblks_chksum * OCB_BLOCK_LEN,
+ nblks_chksum);
+ }
+ }
+ }
+
+ /* Encrypt final partial block. Note that we expect INBUFLEN to be
+ shorter than OCB_BLOCK_LEN (see above). */
+ if (inbuflen)
+ {
+ unsigned char pad[OCB_BLOCK_LEN];
+
+ /* Offset_* = Offset_m xor L_* */
+ cipher_block_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+ /* Pad = ENCIPHER(K, Offset_*) */
+ nburn = c->spec->encrypt (&c->context.c, pad, c->u_iv.iv);
+ burn = nburn > burn ? nburn : burn;
+
+ if (encrypt)
+ {
+ /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
+ /* Note that INBUFLEN is less than OCB_BLOCK_LEN. */
+ buf_cpy (l_tmp, inbuf, inbuflen);
+ memset (l_tmp + inbuflen, 0, OCB_BLOCK_LEN - inbuflen);
+ l_tmp[inbuflen] = 0x80;
+ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+ /* C_* = P_* xor Pad[1..bitlen(P_*)] */
+ buf_xor (outbuf, inbuf, pad, inbuflen);
+ }
+ else
+ {
+ /* P_* = C_* xor Pad[1..bitlen(C_*)] */
+ /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
+ cipher_block_cpy (l_tmp, pad, OCB_BLOCK_LEN);
+ buf_cpy (l_tmp, inbuf, inbuflen);
+ cipher_block_xor_1 (l_tmp, pad, OCB_BLOCK_LEN);
+ l_tmp[inbuflen] = 0x80;
+ buf_cpy (outbuf, l_tmp, inbuflen);
+
+ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+ }
+ }
+
+ /* Compute the tag if the finalize flag has been set. */
+ if (c->marks.finalize)
+ {
+ /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */
+ cipher_block_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv,
+ OCB_BLOCK_LEN);
+ cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar,
+ OCB_BLOCK_LEN);
+ nburn = c->spec->encrypt (&c->context.c,
+ c->u_mode.ocb.tag, c->u_mode.ocb.tag);
+ burn = nburn > burn ? nburn : burn;
+
+ c->u_mode.ocb.data_finalized = 1;
+ /* Note that the the final part of the tag computation is done
+ by _gcry_cipher_ocb_get_tag. */
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
+
+ return 0;
+}
+
+
+/* Encrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF. OUTBUFLEN gives
+ the allocated size of OUTBUF. This function accepts only multiples
+ of a full block unless gcry_cipher_final has been called in which
+ case the next block may have any length. */
+gcry_err_code_t
+_gcry_cipher_ocb_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+
+{
+ return ocb_crypt (c, 1, outbuf, outbuflen, inbuf, inbuflen);
+}
+
+
+/* Decrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF. OUTBUFLEN gives
+ the allocated size of OUTBUF. This function accepts only multiples
+ of a full block unless gcry_cipher_final has been called in which
+ case the next block may have any length. */
+gcry_err_code_t
+_gcry_cipher_ocb_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ return ocb_crypt (c, 0, outbuf, outbuflen, inbuf, inbuflen);
+}
+
+
+/* Compute the tag. The last data operation has already done some
+ part of it. To allow adding AAD even after having done all data,
+ we finish the tag computation only here. */
+static void
+compute_tag_if_needed (gcry_cipher_hd_t c)
+{
+ if (!c->marks.tag)
+ {
+ ocb_aad_finalize (c);
+ cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum,
+ OCB_BLOCK_LEN);
+ c->marks.tag = 1;
+ }
+}
+
+
+/* Copy the already computed tag to OUTTAG. OUTTAGSIZE is the
+ allocated size of OUTTAG; the function returns an error if that is
+ too short to hold the tag. */
+gcry_err_code_t
+_gcry_cipher_ocb_get_tag (gcry_cipher_hd_t c,
+ unsigned char *outtag, size_t outtagsize)
+{
+ if (c->u_mode.ocb.taglen > outtagsize)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (!c->u_mode.ocb.data_finalized)
+ return GPG_ERR_INV_STATE; /* Data has not yet been finalized. */
+
+ compute_tag_if_needed (c);
+
+ memcpy (outtag, c->u_mode.ocb.tag, c->u_mode.ocb.taglen);
+
+ return 0;
+}
+
+
+/* Check that the tag (INTAG,TAGLEN) matches the computed tag for the
+ handle C. */
+gcry_err_code_t
+_gcry_cipher_ocb_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+ size_t taglen)
+{
+ size_t n;
+
+ if (!c->u_mode.ocb.data_finalized)
+ return GPG_ERR_INV_STATE; /* Data has not yet been finalized. */
+
+ compute_tag_if_needed (c);
+
+ n = c->u_mode.ocb.taglen;
+ if (taglen < n)
+ n = taglen;
+
+ if (!buf_eq_const (intag, c->u_mode.ocb.tag, n)
+ || c->u_mode.ocb.taglen != taglen)
+ return GPG_ERR_CHECKSUM;
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-ofb.c b/comm/third_party/libgcrypt/cipher/cipher-ofb.c
new file mode 100644
index 0000000000..09db397e65
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-ofb.c
@@ -0,0 +1,108 @@
+/* cipher-ofb.c - Generic OFB mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_ofb_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ unsigned char *ivp;
+ gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+ size_t blocksize_shift = _gcry_blocksize_shift(c);
+ size_t blocksize = 1 << blocksize_shift;
+ unsigned int burn, nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ if ( inbuflen <= c->unused )
+ {
+ /* Short enough to be encoded by the remaining XOR mask. */
+ /* XOR the input with the IV */
+ ivp = c->u_iv.iv + blocksize - c->unused;
+ buf_xor(outbuf, ivp, inbuf, inbuflen);
+ c->unused -= inbuflen;
+ return 0;
+ }
+
+ burn = 0;
+
+ if( c->unused )
+ {
+ inbuflen -= c->unused;
+ ivp = c->u_iv.iv + blocksize - c->unused;
+ buf_xor(outbuf, ivp, inbuf, c->unused);
+ outbuf += c->unused;
+ inbuf += c->unused;
+ c->unused = 0;
+ }
+
+ /* Now we can process complete blocks. */
+ if (c->bulk.ofb_enc)
+ {
+ size_t nblocks = inbuflen >> blocksize_shift;
+ c->bulk.ofb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+ outbuf += nblocks << blocksize_shift;
+ inbuf += nblocks << blocksize_shift;
+ inbuflen -= nblocks << blocksize_shift;
+ }
+ else
+ {
+ while ( inbuflen >= blocksize )
+ {
+ /* Encrypt the IV (and save the current one). */
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor(outbuf, c->u_iv.iv, inbuf, blocksize);
+ outbuf += blocksize;
+ inbuf += blocksize;
+ inbuflen -= blocksize;
+ }
+ }
+
+ if ( inbuflen )
+ { /* process the remaining bytes */
+ nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ burn = nburn > burn ? nburn : burn;
+ c->unused = blocksize;
+ c->unused -= inbuflen;
+ buf_xor(outbuf, c->u_iv.iv, inbuf, inbuflen);
+ outbuf += inbuflen;
+ inbuf += inbuflen;
+ inbuflen = 0;
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-poly1305.c b/comm/third_party/libgcrypt/cipher/cipher-poly1305.c
new file mode 100644
index 0000000000..bb475236b8
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-poly1305.c
@@ -0,0 +1,375 @@
+/* cipher-poly1305.c - Poly1305 based AEAD cipher mode, RFC-8439
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+#include "./poly1305-internal.h"
+
+
+static inline int
+poly1305_bytecounter_add (u32 ctr[2], size_t add)
+{
+ int overflow = 0;
+
+ if (sizeof(add) > sizeof(u32))
+ {
+ u32 high_add = ((add >> 31) >> 1) & 0xffffffff;
+ ctr[1] += high_add;
+ if (ctr[1] < high_add)
+ overflow = 1;
+ }
+
+ ctr[0] += add;
+ if (ctr[0] >= add)
+ return overflow;
+
+ ctr[1] += 1;
+ return (ctr[1] < 1) || overflow;
+}
+
+
+static void
+poly1305_fill_bytecounts (gcry_cipher_hd_t c)
+{
+ u32 lenbuf[4];
+
+ lenbuf[0] = le_bswap32(c->u_mode.poly1305.aadcount[0]);
+ lenbuf[1] = le_bswap32(c->u_mode.poly1305.aadcount[1]);
+ lenbuf[2] = le_bswap32(c->u_mode.poly1305.datacount[0]);
+ lenbuf[3] = le_bswap32(c->u_mode.poly1305.datacount[1]);
+ _gcry_poly1305_update (&c->u_mode.poly1305.ctx, (byte*)lenbuf,
+ sizeof(lenbuf));
+
+ wipememory(lenbuf, sizeof(lenbuf));
+}
+
+
+static void
+poly1305_do_padding (gcry_cipher_hd_t c, u32 ctr[2])
+{
+ static const byte zero_padding_buf[15] = {};
+ u32 padding_count;
+
+ /* Padding to 16 byte boundary. */
+ if (ctr[0] % 16 > 0)
+ {
+ padding_count = 16 - ctr[0] % 16;
+
+ _gcry_poly1305_update (&c->u_mode.poly1305.ctx, zero_padding_buf,
+ padding_count);
+ }
+}
+
+
+static void
+poly1305_aad_finish (gcry_cipher_hd_t c)
+{
+ /* After AAD, feed padding bytes so we get 16 byte alignment. */
+ poly1305_do_padding (c, c->u_mode.poly1305.aadcount);
+
+ /* Start of encryption marks end of AAD stream. */
+ c->u_mode.poly1305.aad_finalized = 1;
+
+ c->u_mode.poly1305.datacount[0] = 0;
+ c->u_mode.poly1305.datacount[1] = 0;
+}
+
+
+static gcry_err_code_t
+poly1305_set_zeroiv (gcry_cipher_hd_t c)
+{
+ byte zero[8] = { 0, };
+
+ return _gcry_cipher_poly1305_setiv (c, zero, sizeof(zero));
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_authenticate (gcry_cipher_hd_t c,
+ const byte * aadbuf, size_t aadbuflen)
+{
+ if (c->u_mode.poly1305.bytecount_over_limits)
+ return GPG_ERR_INV_LENGTH;
+ if (c->u_mode.poly1305.aad_finalized)
+ return GPG_ERR_INV_STATE;
+ if (c->marks.tag)
+ return GPG_ERR_INV_STATE;
+
+ if (!c->marks.iv)
+ poly1305_set_zeroiv(c);
+
+ if (poly1305_bytecounter_add(c->u_mode.poly1305.aadcount, aadbuflen))
+ {
+ c->u_mode.poly1305.bytecount_over_limits = 1;
+ return GPG_ERR_INV_LENGTH;
+ }
+
+ _gcry_poly1305_update (&c->u_mode.poly1305.ctx, aadbuf, aadbuflen);
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_encrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ gcry_err_code_t err;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->marks.tag)
+ return GPG_ERR_INV_STATE;
+ if (c->u_mode.poly1305.bytecount_over_limits)
+ return GPG_ERR_INV_LENGTH;
+
+ if (!c->marks.iv)
+ {
+ err = poly1305_set_zeroiv(c);
+ if (err)
+ return err;
+ }
+
+ if (!c->u_mode.poly1305.aad_finalized)
+ poly1305_aad_finish(c);
+
+ if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen))
+ {
+ c->u_mode.poly1305.bytecount_over_limits = 1;
+ return GPG_ERR_INV_LENGTH;
+ }
+
+ if (LIKELY(inbuflen > 0) && LIKELY(c->spec->algo == GCRY_CIPHER_CHACHA20))
+ {
+ return _gcry_chacha20_poly1305_encrypt (c, outbuf, inbuf, inbuflen);
+ }
+
+ while (inbuflen)
+ {
+ size_t currlen = inbuflen;
+
+ /* Since checksumming is done after encryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for checksumming. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ c->spec->stencrypt(&c->context.c, outbuf, (byte*)inbuf, currlen);
+
+ _gcry_poly1305_update (&c->u_mode.poly1305.ctx, outbuf, currlen);
+
+ outbuf += currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ inbuflen -= currlen;
+ }
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_decrypt (gcry_cipher_hd_t c,
+ byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ gcry_err_code_t err;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->marks.tag)
+ return GPG_ERR_INV_STATE;
+ if (c->u_mode.poly1305.bytecount_over_limits)
+ return GPG_ERR_INV_LENGTH;
+
+ if (!c->marks.iv)
+ {
+ err = poly1305_set_zeroiv(c);
+ if (err)
+ return err;
+ }
+
+ if (!c->u_mode.poly1305.aad_finalized)
+ poly1305_aad_finish(c);
+
+ if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen))
+ {
+ c->u_mode.poly1305.bytecount_over_limits = 1;
+ return GPG_ERR_INV_LENGTH;
+ }
+
+ if (LIKELY(inbuflen > 0) && LIKELY(c->spec->algo == GCRY_CIPHER_CHACHA20))
+ {
+ return _gcry_chacha20_poly1305_decrypt (c, outbuf, inbuf, inbuflen);
+ }
+
+ while (inbuflen)
+ {
+ size_t currlen = inbuflen;
+
+ /* Since checksumming is done before decryption, process input in 24KiB
+ * chunks to keep data loaded in L1 cache for decryption. */
+ if (currlen > 24 * 1024)
+ currlen = 24 * 1024;
+
+ _gcry_poly1305_update (&c->u_mode.poly1305.ctx, inbuf, currlen);
+
+ c->spec->stdecrypt(&c->context.c, outbuf, (byte*)inbuf, currlen);
+
+ outbuf += currlen;
+ inbuf += currlen;
+ outbuflen -= currlen;
+ inbuflen -= currlen;
+ }
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+_gcry_cipher_poly1305_tag (gcry_cipher_hd_t c,
+ byte * outbuf, size_t outbuflen, int check)
+{
+ gcry_err_code_t err;
+
+ if (outbuflen < POLY1305_TAGLEN)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (c->u_mode.poly1305.bytecount_over_limits)
+ return GPG_ERR_INV_LENGTH;
+
+ if (!c->marks.iv)
+ {
+ err = poly1305_set_zeroiv(c);
+ if (err)
+ return err;
+ }
+
+ if (!c->u_mode.poly1305.aad_finalized)
+ poly1305_aad_finish(c);
+
+ if (!c->marks.tag)
+ {
+ /* After data, feed padding bytes so we get 16 byte alignment. */
+ poly1305_do_padding (c, c->u_mode.poly1305.datacount);
+
+ /* Write byte counts to poly1305. */
+ poly1305_fill_bytecounts(c);
+
+ _gcry_poly1305_finish(&c->u_mode.poly1305.ctx, c->u_iv.iv);
+
+ c->marks.tag = 1;
+ }
+
+ if (!check)
+ {
+ memcpy (outbuf, c->u_iv.iv, POLY1305_TAGLEN);
+ }
+ else
+ {
+ /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF
+ * and thus we need to compare its length first. */
+ if (outbuflen != POLY1305_TAGLEN
+ || !buf_eq_const (outbuf, c->u_iv.iv, POLY1305_TAGLEN))
+ return GPG_ERR_CHECKSUM;
+ }
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+ size_t taglen)
+{
+ return _gcry_cipher_poly1305_tag (c, outtag, taglen, 0);
+}
+
+gcry_err_code_t
+_gcry_cipher_poly1305_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+ size_t taglen)
+{
+ return _gcry_cipher_poly1305_tag (c, (unsigned char *) intag, taglen, 1);
+}
+
+
+void
+_gcry_cipher_poly1305_setkey (gcry_cipher_hd_t c)
+{
+ c->u_mode.poly1305.aadcount[0] = 0;
+ c->u_mode.poly1305.aadcount[1] = 0;
+
+ c->u_mode.poly1305.datacount[0] = 0;
+ c->u_mode.poly1305.datacount[1] = 0;
+
+ c->u_mode.poly1305.bytecount_over_limits = 0;
+ c->u_mode.poly1305.aad_finalized = 0;
+ c->marks.tag = 0;
+ c->marks.iv = 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
+{
+ byte tmpbuf[64]; /* size of ChaCha20 block */
+ gcry_err_code_t err;
+
+ /* IV must be 96-bits */
+ if (!iv && ivlen != (96 / 8))
+ return GPG_ERR_INV_ARG;
+
+ memset(&c->u_mode.poly1305.ctx, 0, sizeof(c->u_mode.poly1305.ctx));
+
+ c->u_mode.poly1305.aadcount[0] = 0;
+ c->u_mode.poly1305.aadcount[1] = 0;
+
+ c->u_mode.poly1305.datacount[0] = 0;
+ c->u_mode.poly1305.datacount[1] = 0;
+
+ c->u_mode.poly1305.bytecount_over_limits = 0;
+ c->u_mode.poly1305.aad_finalized = 0;
+ c->marks.tag = 0;
+ c->marks.iv = 0;
+
+ /* Set up IV for stream cipher. */
+ c->spec->setiv (&c->context.c, iv, ivlen);
+
+ /* Get the first block from ChaCha20. */
+ memset(tmpbuf, 0, sizeof(tmpbuf));
+ c->spec->stencrypt(&c->context.c, tmpbuf, tmpbuf, sizeof(tmpbuf));
+
+ /* Use the first 32-bytes as Poly1305 key. */
+ err = _gcry_poly1305_init (&c->u_mode.poly1305.ctx, tmpbuf, POLY1305_KEYLEN);
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+
+ if (err)
+ return err;
+
+ c->marks.iv = 1;
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-selftest.c b/comm/third_party/libgcrypt/cipher/cipher-selftest.c
new file mode 100644
index 0000000000..d7f38a4261
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-selftest.c
@@ -0,0 +1,512 @@
+/* cipher-selftest.c - Helper functions for bulk encryption selftests.
+ * Copyright (C) 2013,2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#ifdef HAVE_SYSLOG
+# include <syslog.h>
+#endif /*HAVE_SYSLOG*/
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "cipher-internal.h"
+
+#ifdef HAVE_STDINT_H
+# include <stdint.h> /* uintptr_t */
+#elif defined(HAVE_INTTYPES_H)
+# include <inttypes.h>
+#else
+/* In this case, uintptr_t is provided by config.h. */
+#endif
+
+/* Helper macro to force alignment to 16 bytes. */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16 __attribute__ ((aligned (16)))
+#else
+# define ATTR_ALIGNED_16
+#endif
+
+
+/* Return an allocated buffers of size CONTEXT_SIZE with an alignment
+ of 16. The caller must free that buffer using the address returned
+ at R_MEM. Returns NULL and sets ERRNO on failure. */
+void *
+_gcry_cipher_selftest_alloc_ctx (const int context_size, unsigned char **r_mem)
+{
+ int offs;
+ unsigned int ctx_aligned_size, memsize;
+
+ ctx_aligned_size = context_size + 15;
+ ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+ memsize = ctx_aligned_size + 16;
+
+ *r_mem = xtrycalloc (1, memsize);
+ if (!*r_mem)
+ return NULL;
+
+ offs = (16 - ((uintptr_t)*r_mem & 15)) & 15;
+ return (void*)(*r_mem + offs);
+}
+
+
+/* Run the self-tests for <block cipher>-CBC-<block size>, tests bulk CBC
+ decryption. Returns NULL on success. */
+const char *
+_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func,
+ gcry_cipher_encrypt_t encrypt_one,
+ const int nblocks, const int blocksize,
+ const int context_size)
+{
+ cipher_bulk_ops_t bulk_ops = { 0, };
+ int i, offs;
+ unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem;
+ unsigned int ctx_aligned_size, memsize;
+
+ static const unsigned char key[16] ATTR_ALIGNED_16 = {
+ 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+ 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22
+ };
+
+ /* Allocate buffers, align first two elements to 16 bytes and latter to
+ block size. */
+ ctx_aligned_size = context_size + 15;
+ ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+ memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 16;
+
+ mem = xtrycalloc (1, memsize);
+ if (!mem)
+ return "failed to allocate memory";
+
+ offs = (16 - ((uintptr_t)mem & 15)) & 15;
+ ctx = (void*)(mem + offs);
+ iv = ctx + ctx_aligned_size;
+ iv2 = iv + blocksize;
+ plaintext = iv2 + blocksize;
+ plaintext2 = plaintext + nblocks * blocksize;
+ ciphertext = plaintext2 + nblocks * blocksize;
+
+ /* Initialize ctx */
+ if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR)
+ {
+ xfree(mem);
+ return "setkey failed";
+ }
+
+ /* Test single block code path */
+ memset (iv, 0x4e, blocksize);
+ memset (iv2, 0x4e, blocksize);
+ for (i = 0; i < blocksize; i++)
+ plaintext[i] = i;
+
+ /* CBC manually. */
+ buf_xor (ciphertext, iv, plaintext, blocksize);
+ encrypt_one (ctx, ciphertext, ciphertext);
+ memcpy (iv, ciphertext, blocksize);
+
+ /* CBC decrypt. */
+ bulk_ops.cbc_dec (ctx, iv2, plaintext2, ciphertext, 1);
+ if (memcmp (plaintext2, plaintext, blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CBC-%d test failed (plaintext mismatch)", cipher,
+ blocksize * 8);
+#else
+ (void)cipher; /* Not used. */
+#endif
+ return "selftest for CBC failed - see syslog for details";
+ }
+
+ if (memcmp (iv2, iv, blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CBC-%d test failed (IV mismatch)", cipher, blocksize * 8);
+#endif
+ return "selftest for CBC failed - see syslog for details";
+ }
+
+ /* Test parallelized code paths */
+ memset (iv, 0x5f, blocksize);
+ memset (iv2, 0x5f, blocksize);
+
+ for (i = 0; i < nblocks * blocksize; i++)
+ plaintext[i] = i;
+
+ /* Create CBC ciphertext manually. */
+ for (i = 0; i < nblocks * blocksize; i+=blocksize)
+ {
+ buf_xor (&ciphertext[i], iv, &plaintext[i], blocksize);
+ encrypt_one (ctx, &ciphertext[i], &ciphertext[i]);
+ memcpy (iv, &ciphertext[i], blocksize);
+ }
+
+ /* Decrypt using bulk CBC and compare result. */
+ bulk_ops.cbc_dec (ctx, iv2, plaintext2, ciphertext, nblocks);
+
+ if (memcmp (plaintext2, plaintext, nblocks * blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CBC-%d test failed (plaintext mismatch, parallel path)",
+ cipher, blocksize * 8);
+#endif
+ return "selftest for CBC failed - see syslog for details";
+ }
+ if (memcmp (iv2, iv, blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CBC-%d test failed (IV mismatch, parallel path)",
+ cipher, blocksize * 8);
+#endif
+ return "selftest for CBC failed - see syslog for details";
+ }
+
+ xfree (mem);
+ return NULL;
+}
+
+/* Run the self-tests for <block cipher>-CFB-<block size>, tests bulk CFB
+ decryption. Returns NULL on success. */
+const char *
+_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey_func,
+ gcry_cipher_encrypt_t encrypt_one,
+ const int nblocks, const int blocksize,
+ const int context_size)
+{
+ cipher_bulk_ops_t bulk_ops = { 0, };
+ int i, offs;
+ unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem;
+ unsigned int ctx_aligned_size, memsize;
+
+ static const unsigned char key[16] ATTR_ALIGNED_16 = {
+ 0x11,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+ 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x33
+ };
+
+ /* Allocate buffers, align first two elements to 16 bytes and latter to
+ block size. */
+ ctx_aligned_size = context_size + 15;
+ ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+ memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 16;
+
+ mem = xtrycalloc (1, memsize);
+ if (!mem)
+ return "failed to allocate memory";
+
+ offs = (16 - ((uintptr_t)mem & 15)) & 15;
+ ctx = (void*)(mem + offs);
+ iv = ctx + ctx_aligned_size;
+ iv2 = iv + blocksize;
+ plaintext = iv2 + blocksize;
+ plaintext2 = plaintext + nblocks * blocksize;
+ ciphertext = plaintext2 + nblocks * blocksize;
+
+ /* Initialize ctx */
+ if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR)
+ {
+ xfree(mem);
+ return "setkey failed";
+ }
+
+ /* Test single block code path */
+ memset(iv, 0xd3, blocksize);
+ memset(iv2, 0xd3, blocksize);
+ for (i = 0; i < blocksize; i++)
+ plaintext[i] = i;
+
+ /* CFB manually. */
+ encrypt_one (ctx, ciphertext, iv);
+ buf_xor_2dst (iv, ciphertext, plaintext, blocksize);
+
+ /* CFB decrypt. */
+ bulk_ops.cfb_dec (ctx, iv2, plaintext2, ciphertext, 1);
+ if (memcmp(plaintext2, plaintext, blocksize))
+ {
+ xfree(mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CFB-%d test failed (plaintext mismatch)", cipher,
+ blocksize * 8);
+#else
+ (void)cipher; /* Not used. */
+#endif
+ return "selftest for CFB failed - see syslog for details";
+ }
+
+ if (memcmp(iv2, iv, blocksize))
+ {
+ xfree(mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CFB-%d test failed (IV mismatch)", cipher, blocksize * 8);
+#endif
+ return "selftest for CFB failed - see syslog for details";
+ }
+
+ /* Test parallelized code paths */
+ memset(iv, 0xe6, blocksize);
+ memset(iv2, 0xe6, blocksize);
+
+ for (i = 0; i < nblocks * blocksize; i++)
+ plaintext[i] = i;
+
+ /* Create CFB ciphertext manually. */
+ for (i = 0; i < nblocks * blocksize; i+=blocksize)
+ {
+ encrypt_one (ctx, &ciphertext[i], iv);
+ buf_xor_2dst (iv, &ciphertext[i], &plaintext[i], blocksize);
+ }
+
+ /* Decrypt using bulk CBC and compare result. */
+ bulk_ops.cfb_dec (ctx, iv2, plaintext2, ciphertext, nblocks);
+
+ if (memcmp(plaintext2, plaintext, nblocks * blocksize))
+ {
+ xfree(mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CFB-%d test failed (plaintext mismatch, parallel path)",
+ cipher, blocksize * 8);
+#endif
+ return "selftest for CFB failed - see syslog for details";
+ }
+ if (memcmp(iv2, iv, blocksize))
+ {
+ xfree(mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CFB-%d test failed (IV mismatch, parallel path)", cipher,
+ blocksize * 8);
+#endif
+ return "selftest for CFB failed - see syslog for details";
+ }
+
+ xfree(mem);
+ return NULL;
+}
+
+/* Run the self-tests for <block cipher>-CTR-<block size>, tests IV increment
+ of bulk CTR encryption. Returns NULL on success. */
+const char *
+_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey_func,
+ gcry_cipher_encrypt_t encrypt_one,
+ const int nblocks, const int blocksize,
+ const int context_size)
+{
+ cipher_bulk_ops_t bulk_ops = { 0, };
+ int i, j, offs, diff;
+ unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *ciphertext2,
+ *iv, *iv2, *mem;
+ unsigned int ctx_aligned_size, memsize;
+
+ static const unsigned char key[16] ATTR_ALIGNED_16 = {
+ 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+ 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21
+ };
+
+ /* Allocate buffers, align first two elements to 16 bytes and latter to
+ block size. */
+ ctx_aligned_size = context_size + 15;
+ ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+ memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 4) + 16;
+
+ mem = xtrycalloc (1, memsize);
+ if (!mem)
+ return "failed to allocate memory";
+
+ offs = (16 - ((uintptr_t)mem & 15)) & 15;
+ ctx = (void*)(mem + offs);
+ iv = ctx + ctx_aligned_size;
+ iv2 = iv + blocksize;
+ plaintext = iv2 + blocksize;
+ plaintext2 = plaintext + nblocks * blocksize;
+ ciphertext = plaintext2 + nblocks * blocksize;
+ ciphertext2 = ciphertext + nblocks * blocksize;
+
+ /* Initialize ctx */
+ if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR)
+ {
+ xfree(mem);
+ return "setkey failed";
+ }
+
+ /* Test single block code path */
+ memset (iv, 0xff, blocksize);
+ for (i = 0; i < blocksize; i++)
+ plaintext[i] = i;
+
+ /* CTR manually. */
+ encrypt_one (ctx, ciphertext, iv);
+ for (i = 0; i < blocksize; i++)
+ ciphertext[i] ^= plaintext[i];
+ for (i = blocksize; i > 0; i--)
+ {
+ iv[i-1]++;
+ if (iv[i-1])
+ break;
+ }
+
+ memset (iv2, 0xff, blocksize);
+ bulk_ops.ctr_enc (ctx, iv2, plaintext2, ciphertext, 1);
+
+ if (memcmp (plaintext2, plaintext, blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CTR-%d test failed (plaintext mismatch)", cipher,
+ blocksize * 8);
+#else
+ (void)cipher; /* Not used. */
+#endif
+ return "selftest for CTR failed - see syslog for details";
+ }
+
+ if (memcmp (iv2, iv, blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CTR-%d test failed (IV mismatch)", cipher,
+ blocksize * 8);
+#endif
+ return "selftest for CTR failed - see syslog for details";
+ }
+
+ /* Test bulk encryption with typical IV. */
+ memset(iv, 0x57, blocksize-4);
+ iv[blocksize-1] = 1;
+ iv[blocksize-2] = 0;
+ iv[blocksize-3] = 0;
+ iv[blocksize-4] = 0;
+ memset(iv2, 0x57, blocksize-4);
+ iv2[blocksize-1] = 1;
+ iv2[blocksize-2] = 0;
+ iv2[blocksize-3] = 0;
+ iv2[blocksize-4] = 0;
+
+ for (i = 0; i < blocksize * nblocks; i++)
+ plaintext2[i] = plaintext[i] = i;
+
+ /* Create CTR ciphertext manually. */
+ for (i = 0; i < blocksize * nblocks; i+=blocksize)
+ {
+ encrypt_one (ctx, &ciphertext[i], iv);
+ for (j = 0; j < blocksize; j++)
+ ciphertext[i+j] ^= plaintext[i+j];
+ for (j = blocksize; j > 0; j--)
+ {
+ iv[j-1]++;
+ if (iv[j-1])
+ break;
+ }
+ }
+
+ bulk_ops.ctr_enc (ctx, iv2, ciphertext2, plaintext2, nblocks);
+
+ if (memcmp (ciphertext2, ciphertext, blocksize * nblocks))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CTR-%d test failed (ciphertext mismatch, bulk)", cipher,
+ blocksize * 8);
+#endif
+ return "selftest for CTR failed - see syslog for details";
+ }
+ if (memcmp(iv2, iv, blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CTR-%d test failed (IV mismatch, bulk)", cipher,
+ blocksize * 8);
+#endif
+ return "selftest for CTR failed - see syslog for details";
+ }
+
+ /* Test parallelized code paths (check counter overflow handling) */
+ for (diff = 0; diff < nblocks; diff++) {
+ memset(iv, 0xff, blocksize);
+ iv[blocksize-1] -= diff;
+ iv[0] = iv[1] = 0;
+ iv[2] = 0x07;
+
+ for (i = 0; i < blocksize * nblocks; i++)
+ plaintext[i] = i;
+
+ /* Create CTR ciphertext manually. */
+ for (i = 0; i < blocksize * nblocks; i+=blocksize)
+ {
+ encrypt_one (ctx, &ciphertext[i], iv);
+ for (j = 0; j < blocksize; j++)
+ ciphertext[i+j] ^= plaintext[i+j];
+ for (j = blocksize; j > 0; j--)
+ {
+ iv[j-1]++;
+ if (iv[j-1])
+ break;
+ }
+ }
+
+ /* Decrypt using bulk CTR and compare result. */
+ memset(iv2, 0xff, blocksize);
+ iv2[blocksize-1] -= diff;
+ iv2[0] = iv2[1] = 0;
+ iv2[2] = 0x07;
+
+ bulk_ops.ctr_enc (ctx, iv2, plaintext2, ciphertext, nblocks);
+
+ if (memcmp (plaintext2, plaintext, blocksize * nblocks))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CTR-%d test failed (plaintext mismatch, diff: %d)", cipher,
+ blocksize * 8, diff);
+#endif
+ return "selftest for CTR failed - see syslog for details";
+ }
+ if (memcmp(iv2, iv, blocksize))
+ {
+ xfree (mem);
+#ifdef HAVE_SYSLOG
+ syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+ "%s-CTR-%d test failed (IV mismatch, diff: %d)", cipher,
+ blocksize * 8, diff);
+#endif
+ return "selftest for CTR failed - see syslog for details";
+ }
+ }
+
+ xfree (mem);
+ return NULL;
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher-selftest.h b/comm/third_party/libgcrypt/cipher/cipher-selftest.h
new file mode 100644
index 0000000000..c3090ad122
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-selftest.h
@@ -0,0 +1,69 @@
+/* cipher-selftest.h - Helper functions for bulk encryption selftests.
+ * Copyright (C) 2013,2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_SELFTEST_HELP_H
+#define G10_SELFTEST_HELP_H
+
+#include <config.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+
+typedef void (*gcry_cipher_bulk_cbc_dec_t)(void *context, unsigned char *iv,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks);
+
+typedef void (*gcry_cipher_bulk_cfb_dec_t)(void *context, unsigned char *iv,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks);
+
+typedef void (*gcry_cipher_bulk_ctr_enc_t)(void *context, unsigned char *iv,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks);
+
+/* Helper function to allocate an aligned context for selftests. */
+void *_gcry_cipher_selftest_alloc_ctx (const int context_size,
+ unsigned char **r_mem);
+
+
+/* Helper function for bulk CBC decryption selftest */
+const char *
+_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey,
+ gcry_cipher_encrypt_t encrypt_one,
+ const int nblocks, const int blocksize,
+ const int context_size);
+
+/* Helper function for bulk CFB decryption selftest */
+const char *
+_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey,
+ gcry_cipher_encrypt_t encrypt_one,
+ const int nblocks, const int blocksize,
+ const int context_size);
+
+/* Helper function for bulk CTR encryption selftest */
+const char *
+_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey,
+ gcry_cipher_encrypt_t encrypt_one,
+ const int nblocks, const int blocksize,
+ const int context_size);
+
+#endif /*G10_SELFTEST_HELP_H*/
diff --git a/comm/third_party/libgcrypt/cipher/cipher-xts.c b/comm/third_party/libgcrypt/cipher/cipher-xts.c
new file mode 100644
index 0000000000..0522a271a1
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher-xts.c
@@ -0,0 +1,189 @@
+/* cipher-xts.c - XTS mode implementation
+ * Copyright (C) 2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+static inline void xts_gfmul_byA (unsigned char *out, const unsigned char *in)
+{
+ u64 hi = buf_get_le64 (in + 8);
+ u64 lo = buf_get_le64 (in + 0);
+ u64 carry = -(hi >> 63) & 0x87;
+
+ hi = (hi << 1) + (lo >> 63);
+ lo = (lo << 1) ^ carry;
+
+ buf_put_le64 (out + 8, hi);
+ buf_put_le64 (out + 0, lo);
+}
+
+
+static inline void xts_inc128 (unsigned char *seqno)
+{
+ u64 lo = buf_get_le64 (seqno + 0);
+ u64 hi = buf_get_le64 (seqno + 8);
+
+ hi += !(++lo);
+
+ buf_put_le64 (seqno + 0, lo);
+ buf_put_le64 (seqno + 8, hi);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_xts_crypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen,
+ int encrypt)
+{
+ gcry_cipher_encrypt_t tweak_fn = c->spec->encrypt;
+ gcry_cipher_encrypt_t crypt_fn =
+ encrypt ? c->spec->encrypt : c->spec->decrypt;
+ union
+ {
+ cipher_context_alignment_t xcx;
+ byte x1[GCRY_XTS_BLOCK_LEN];
+ u64 x64[GCRY_XTS_BLOCK_LEN / sizeof(u64)];
+ } tmp;
+ unsigned int burn, nburn;
+ size_t nblocks;
+
+ if (c->spec->blocksize != GCRY_XTS_BLOCK_LEN)
+ return GPG_ERR_CIPHER_ALGO;
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if (inbuflen < GCRY_XTS_BLOCK_LEN)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+
+ /* Data-unit max length: 2^20 blocks. */
+ if (inbuflen > GCRY_XTS_BLOCK_LEN << 20)
+ return GPG_ERR_INV_LENGTH;
+
+ nblocks = inbuflen / GCRY_XTS_BLOCK_LEN;
+ nblocks -= !encrypt && (inbuflen % GCRY_XTS_BLOCK_LEN) != 0;
+
+ /* Generate first tweak value. */
+ burn = tweak_fn (c->u_mode.xts.tweak_context, c->u_ctr.ctr, c->u_iv.iv);
+
+ /* Use a bulk method if available. */
+ if (nblocks && c->bulk.xts_crypt)
+ {
+ c->bulk.xts_crypt (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks,
+ encrypt);
+ inbuf += nblocks * GCRY_XTS_BLOCK_LEN;
+ outbuf += nblocks * GCRY_XTS_BLOCK_LEN;
+ inbuflen -= nblocks * GCRY_XTS_BLOCK_LEN;
+ nblocks = 0;
+ }
+
+ /* If we don't have a bulk method use the standard method. We also
+ use this method for the a remaining partial block. */
+
+ while (nblocks)
+ {
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ cipher_block_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+ nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+
+ outbuf += GCRY_XTS_BLOCK_LEN;
+ inbuf += GCRY_XTS_BLOCK_LEN;
+ inbuflen -= GCRY_XTS_BLOCK_LEN;
+ nblocks--;
+
+ /* Generate next tweak. */
+ xts_gfmul_byA (c->u_ctr.ctr, c->u_ctr.ctr);
+ }
+
+ /* Handle remaining data with ciphertext stealing. */
+ if (inbuflen)
+ {
+ if (!encrypt)
+ {
+ gcry_assert (inbuflen > GCRY_XTS_BLOCK_LEN);
+ gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN * 2);
+
+ /* Generate last tweak. */
+ xts_gfmul_byA (tmp.x1, c->u_ctr.ctr);
+
+ /* Decrypt last block first. */
+ cipher_block_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+ nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+
+ inbuflen -= GCRY_XTS_BLOCK_LEN;
+ inbuf += GCRY_XTS_BLOCK_LEN;
+ outbuf += GCRY_XTS_BLOCK_LEN;
+ }
+
+ gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN);
+ outbuf -= GCRY_XTS_BLOCK_LEN;
+
+ /* Steal ciphertext from previous block. */
+ cipher_block_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN);
+ buf_cpy (tmp.x64, inbuf, inbuflen);
+ buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen);
+
+ /* Decrypt/Encrypt last block. */
+ cipher_block_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+ nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
+ burn = nburn > burn ? nburn : burn;
+ cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+ }
+
+ /* Auto-increment data-unit sequence number */
+ xts_inc128 (c->u_iv.iv);
+
+ wipememory (&tmp, sizeof(tmp));
+ wipememory (c->u_ctr.ctr, sizeof(c->u_ctr.ctr));
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_xts_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_xts_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0);
+}
diff --git a/comm/third_party/libgcrypt/cipher/cipher.c b/comm/third_party/libgcrypt/cipher/cipher.c
new file mode 100644
index 0000000000..1039dff728
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/cipher.c
@@ -0,0 +1,1767 @@
+/* cipher.c - cipher dispatcher
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "../src/gcrypt-testapi.h"
+#include "cipher.h"
+#include "./cipher-internal.h"
+
+
+/* This is the list of the default ciphers, which are included in
+ libgcrypt. */
+static gcry_cipher_spec_t * const cipher_list[] =
+ {
+#if USE_BLOWFISH
+ &_gcry_cipher_spec_blowfish,
+#endif
+#if USE_DES
+ &_gcry_cipher_spec_des,
+ &_gcry_cipher_spec_tripledes,
+#endif
+#if USE_ARCFOUR
+ &_gcry_cipher_spec_arcfour,
+#endif
+#if USE_CAST5
+ &_gcry_cipher_spec_cast5,
+#endif
+#if USE_AES
+ &_gcry_cipher_spec_aes,
+ &_gcry_cipher_spec_aes192,
+ &_gcry_cipher_spec_aes256,
+#endif
+#if USE_TWOFISH
+ &_gcry_cipher_spec_twofish,
+ &_gcry_cipher_spec_twofish128,
+#endif
+#if USE_SERPENT
+ &_gcry_cipher_spec_serpent128,
+ &_gcry_cipher_spec_serpent192,
+ &_gcry_cipher_spec_serpent256,
+#endif
+#if USE_RFC2268
+ &_gcry_cipher_spec_rfc2268_40,
+ &_gcry_cipher_spec_rfc2268_128,
+#endif
+#if USE_SEED
+ &_gcry_cipher_spec_seed,
+#endif
+#if USE_CAMELLIA
+ &_gcry_cipher_spec_camellia128,
+ &_gcry_cipher_spec_camellia192,
+ &_gcry_cipher_spec_camellia256,
+#endif
+#ifdef USE_IDEA
+ &_gcry_cipher_spec_idea,
+#endif
+#if USE_SALSA20
+ &_gcry_cipher_spec_salsa20,
+ &_gcry_cipher_spec_salsa20r12,
+#endif
+#if USE_GOST28147
+ &_gcry_cipher_spec_gost28147,
+ &_gcry_cipher_spec_gost28147_mesh,
+#endif
+#if USE_CHACHA20
+ &_gcry_cipher_spec_chacha20,
+#endif
+#if USE_SM4
+ &_gcry_cipher_spec_sm4,
+#endif
+ NULL
+ };
+
+/* Cipher implementations starting with index 0 (enum gcry_cipher_algos) */
+static gcry_cipher_spec_t * const cipher_list_algo0[] =
+ {
+ NULL, /* GCRY_CIPHER_NONE */
+#ifdef USE_IDEA
+ &_gcry_cipher_spec_idea,
+#else
+ NULL,
+#endif
+#if USE_DES
+ &_gcry_cipher_spec_tripledes,
+#else
+ NULL,
+#endif
+#if USE_CAST5
+ &_gcry_cipher_spec_cast5,
+#else
+ NULL,
+#endif
+#if USE_BLOWFISH
+ &_gcry_cipher_spec_blowfish,
+#else
+ NULL,
+#endif
+ NULL, /* GCRY_CIPHER_SAFER_SK128 */
+ NULL, /* GCRY_CIPHER_DES_SK */
+#if USE_AES
+ &_gcry_cipher_spec_aes,
+ &_gcry_cipher_spec_aes192,
+ &_gcry_cipher_spec_aes256,
+#else
+ NULL,
+ NULL,
+ NULL,
+#endif
+#if USE_TWOFISH
+ &_gcry_cipher_spec_twofish
+#else
+ NULL
+#endif
+ };
+
+/* Cipher implementations starting with index 301 (enum gcry_cipher_algos) */
+static gcry_cipher_spec_t * const cipher_list_algo301[] =
+ {
+#if USE_ARCFOUR
+ &_gcry_cipher_spec_arcfour,
+#else
+ NULL,
+#endif
+#if USE_DES
+ &_gcry_cipher_spec_des,
+#else
+ NULL,
+#endif
+#if USE_TWOFISH
+ &_gcry_cipher_spec_twofish128,
+#else
+ NULL,
+#endif
+#if USE_SERPENT
+ &_gcry_cipher_spec_serpent128,
+ &_gcry_cipher_spec_serpent192,
+ &_gcry_cipher_spec_serpent256,
+#else
+ NULL,
+ NULL,
+ NULL,
+#endif
+#if USE_RFC2268
+ &_gcry_cipher_spec_rfc2268_40,
+ &_gcry_cipher_spec_rfc2268_128,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_SEED
+ &_gcry_cipher_spec_seed,
+#else
+ NULL,
+#endif
+#if USE_CAMELLIA
+ &_gcry_cipher_spec_camellia128,
+ &_gcry_cipher_spec_camellia192,
+ &_gcry_cipher_spec_camellia256,
+#else
+ NULL,
+ NULL,
+ NULL,
+#endif
+#if USE_SALSA20
+ &_gcry_cipher_spec_salsa20,
+ &_gcry_cipher_spec_salsa20r12,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_GOST28147
+ &_gcry_cipher_spec_gost28147,
+#else
+ NULL,
+#endif
+#if USE_CHACHA20
+ &_gcry_cipher_spec_chacha20,
+#else
+ NULL,
+#endif
+#if USE_GOST28147
+ &_gcry_cipher_spec_gost28147_mesh,
+#else
+ NULL,
+#endif
+#if USE_SM4
+ &_gcry_cipher_spec_sm4,
+#else
+ NULL,
+#endif
+ };
+
+
+static void _gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode);
+
+
+static int
+map_algo (int algo)
+{
+ return algo;
+}
+
+
+/* Return the spec structure for the cipher algorithm ALGO. For
+ an unknown algorithm NULL is returned. */
+static gcry_cipher_spec_t *
+spec_from_algo (int algo)
+{
+ gcry_cipher_spec_t *spec = NULL;
+
+ algo = map_algo (algo);
+
+ if (algo >= 0 && algo < DIM(cipher_list_algo0))
+ spec = cipher_list_algo0[algo];
+ else if (algo >= 301 && algo < 301 + DIM(cipher_list_algo301))
+ spec = cipher_list_algo301[algo - 301];
+
+ if (spec)
+ gcry_assert (spec->algo == algo);
+
+ return spec;
+}
+
+
+/* Lookup a cipher's spec by its name. */
+static gcry_cipher_spec_t *
+spec_from_name (const char *name)
+{
+ gcry_cipher_spec_t *spec;
+ int idx;
+ const char **aliases;
+
+ for (idx=0; (spec = cipher_list[idx]); idx++)
+ {
+ if (!stricmp (name, spec->name))
+ return spec;
+ if (spec->aliases)
+ {
+ for (aliases = spec->aliases; *aliases; aliases++)
+ if (!stricmp (name, *aliases))
+ return spec;
+ }
+ }
+
+ return NULL;
+}
+
+
+/* Lookup a cipher's spec by its OID. */
+static gcry_cipher_spec_t *
+spec_from_oid (const char *oid)
+{
+ gcry_cipher_spec_t *spec;
+ gcry_cipher_oid_spec_t *oid_specs;
+ int idx, j;
+
+ for (idx=0; (spec = cipher_list[idx]); idx++)
+ {
+ oid_specs = spec->oids;
+ if (oid_specs)
+ {
+ for (j = 0; oid_specs[j].oid; j++)
+ if (!stricmp (oid, oid_specs[j].oid))
+ return spec;
+ }
+ }
+
+ return NULL;
+}
+
+
+/* Locate the OID in the oid table and return the spec or NULL if not
+ found. An optional "oid." or "OID." prefix in OID is ignored, the
+ OID is expected to be in standard IETF dotted notation. A pointer
+ to the OID specification of the module implementing this algorithm
+ is return in OID_SPEC unless passed as NULL.*/
+static gcry_cipher_spec_t *
+search_oid (const char *oid, gcry_cipher_oid_spec_t *oid_spec)
+{
+ gcry_cipher_spec_t *spec;
+ int i;
+
+ if (!oid)
+ return NULL;
+
+ if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4))
+ oid += 4;
+
+ spec = spec_from_oid (oid);
+ if (spec && spec->oids)
+ {
+ for (i = 0; spec->oids[i].oid; i++)
+ if (!stricmp (oid, spec->oids[i].oid))
+ {
+ if (oid_spec)
+ *oid_spec = spec->oids[i];
+ return spec;
+ }
+ }
+
+ return NULL;
+}
+
+
+/* Map STRING to the cipher algorithm identifier. Returns the
+ algorithm ID of the cipher for the given name or 0 if the name is
+ not known. It is valid to pass NULL for STRING which results in a
+ return value of 0. */
+int
+_gcry_cipher_map_name (const char *string)
+{
+ gcry_cipher_spec_t *spec;
+
+ if (!string)
+ return 0;
+
+ /* If the string starts with a digit (optionally prefixed with
+ either "OID." or "oid."), we first look into our table of ASN.1
+ object identifiers to figure out the algorithm */
+
+ spec = search_oid (string, NULL);
+ if (spec)
+ return spec->algo;
+
+ spec = spec_from_name (string);
+ if (spec)
+ return spec->algo;
+
+ return 0;
+}
+
+
+/* Given a STRING with an OID in dotted decimal notation, this
+ function returns the cipher mode (GCRY_CIPHER_MODE_*) associated
+ with that OID or 0 if no mode is known. Passing NULL for string
+ yields a return value of 0. */
+int
+_gcry_cipher_mode_from_oid (const char *string)
+{
+ gcry_cipher_spec_t *spec;
+ gcry_cipher_oid_spec_t oid_spec;
+
+ if (!string)
+ return 0;
+
+ spec = search_oid (string, &oid_spec);
+ if (spec)
+ return oid_spec.mode;
+
+ return 0;
+}
+
+
+/* Map the cipher algorithm identifier ALGORITHM to a string
+ representing this algorithm. This string is the default name as
+ used by Libgcrypt. A "?" is returned for an unknown algorithm.
+ NULL is never returned. */
+const char *
+_gcry_cipher_algo_name (int algorithm)
+{
+ gcry_cipher_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ return spec? spec->name : "?";
+}
+
+
+/* Flag the cipher algorithm with the identifier ALGORITHM as
+ disabled. There is no error return, the function does nothing for
+ unknown algorithms. Disabled algorithms are virtually not
+ available in Libgcrypt. This is not thread safe and should thus be
+ called early. */
+static void
+disable_cipher_algo (int algo)
+{
+ gcry_cipher_spec_t *spec = spec_from_algo (algo);
+
+ if (spec)
+ spec->flags.disabled = 1;
+}
+
+
+/* Return 0 if the cipher algorithm with identifier ALGORITHM is
+ available. Returns a basic error code value if it is not
+ available. */
+static gcry_err_code_t
+check_cipher_algo (int algorithm)
+{
+ gcry_cipher_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ if (spec && !spec->flags.disabled)
+ return 0;
+
+ return GPG_ERR_CIPHER_ALGO;
+}
+
+
+/* Return the standard length in bits of the key for the cipher
+ algorithm with the identifier ALGORITHM. */
+static unsigned int
+cipher_get_keylen (int algorithm)
+{
+ gcry_cipher_spec_t *spec;
+ unsigned len = 0;
+
+ spec = spec_from_algo (algorithm);
+ if (spec)
+ {
+ len = spec->keylen;
+ if (!len)
+ log_bug ("cipher %d w/o key length\n", algorithm);
+ }
+
+ return len;
+}
+
+
+/* Return the block length of the cipher algorithm with the identifier
+ ALGORITHM. This function return 0 for an invalid algorithm. */
+static unsigned int
+cipher_get_blocksize (int algorithm)
+{
+ gcry_cipher_spec_t *spec;
+ unsigned len = 0;
+
+ spec = spec_from_algo (algorithm);
+ if (spec)
+ {
+ len = spec->blocksize;
+ if (!len)
+ log_bug ("cipher %d w/o blocksize\n", algorithm);
+ }
+
+ return len;
+}
+
+
+/*
+ Open a cipher handle for use with cipher algorithm ALGORITHM, using
+ the cipher mode MODE (one of the GCRY_CIPHER_MODE_*) and return a
+ handle in HANDLE. Put NULL into HANDLE and return an error code if
+ something goes wrong. FLAGS may be used to modify the
+ operation. The defined flags are:
+
+ GCRY_CIPHER_SECURE: allocate all internal buffers in secure memory.
+ GCRY_CIPHER_ENABLE_SYNC: Enable the sync operation as used in OpenPGP.
+ GCRY_CIPHER_CBC_CTS: Enable CTS mode.
+ GCRY_CIPHER_CBC_MAC: Enable MAC mode.
+
+ Values for these flags may be combined using OR.
+ */
+gcry_err_code_t
+_gcry_cipher_open (gcry_cipher_hd_t *handle,
+ int algo, int mode, unsigned int flags)
+{
+ gcry_err_code_t rc;
+ gcry_cipher_hd_t h = NULL;
+
+ if (mode >= GCRY_CIPHER_MODE_INTERNAL)
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ else
+ rc = _gcry_cipher_open_internal (&h, algo, mode, flags);
+
+ *handle = rc ? NULL : h;
+
+ return rc;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
+ int algo, int mode, unsigned int flags)
+{
+ int secure = (flags & GCRY_CIPHER_SECURE);
+ gcry_cipher_spec_t *spec;
+ gcry_cipher_hd_t h = NULL;
+ gcry_err_code_t err;
+
+ /* If the application missed to call the random poll function, we do
+ it here to ensure that it is used once in a while. */
+ _gcry_fast_random_poll ();
+
+ spec = spec_from_algo (algo);
+ if (!spec)
+ err = GPG_ERR_CIPHER_ALGO;
+ else if (spec->flags.disabled)
+ err = GPG_ERR_CIPHER_ALGO;
+ else
+ err = 0;
+
+ /* check flags */
+ if ((! err)
+ && ((flags & ~(0
+ | GCRY_CIPHER_SECURE
+ | GCRY_CIPHER_ENABLE_SYNC
+ | GCRY_CIPHER_CBC_CTS
+ | GCRY_CIPHER_CBC_MAC))
+ || ((flags & GCRY_CIPHER_CBC_CTS) && (flags & GCRY_CIPHER_CBC_MAC))))
+ err = GPG_ERR_CIPHER_ALGO;
+
+ /* check that a valid mode has been requested */
+ if (! err)
+ switch (mode)
+ {
+ case GCRY_CIPHER_MODE_CCM:
+ if (spec->blocksize != GCRY_CCM_BLOCK_LEN)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ if (!spec->encrypt || !spec->decrypt)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_XTS:
+ if (spec->blocksize != GCRY_XTS_BLOCK_LEN)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ if (!spec->encrypt || !spec->decrypt)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_ECB:
+ case GCRY_CIPHER_MODE_CBC:
+ case GCRY_CIPHER_MODE_CFB:
+ case GCRY_CIPHER_MODE_CFB8:
+ case GCRY_CIPHER_MODE_OFB:
+ case GCRY_CIPHER_MODE_CTR:
+ case GCRY_CIPHER_MODE_AESWRAP:
+ case GCRY_CIPHER_MODE_CMAC:
+ case GCRY_CIPHER_MODE_EAX:
+ case GCRY_CIPHER_MODE_GCM:
+ if (!spec->encrypt || !spec->decrypt)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_POLY1305:
+ if (!spec->stencrypt || !spec->stdecrypt || !spec->setiv)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ else if (spec->algo != GCRY_CIPHER_CHACHA20)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_OCB:
+ /* Note that our implementation allows only for 128 bit block
+ length algorithms. Lower block lengths would be possible
+ but we do not implement them because they limit the
+ security too much. */
+ if (!spec->encrypt || !spec->decrypt)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ else if (spec->blocksize != (128/8))
+ err = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_STREAM:
+ if (!spec->stencrypt || !spec->stdecrypt)
+ err = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_NONE:
+ /* This mode may be used for debugging. It copies the main
+ text verbatim to the ciphertext. We do not allow this in
+ fips mode or if no debug flag has been set. */
+ if (fips_mode () || !_gcry_get_debug_flag (0))
+ err = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ default:
+ err = GPG_ERR_INV_CIPHER_MODE;
+ }
+
+ /* Perform selftest here and mark this with a flag in cipher_table?
+ No, we should not do this as it takes too long. Further it does
+ not make sense to exclude algorithms with failing selftests at
+ runtime: If a selftest fails there is something seriously wrong
+ with the system and thus we better die immediately. */
+
+ if (! err)
+ {
+ size_t size = (sizeof (*h)
+ + 2 * spec->contextsize
+ - sizeof (cipher_context_alignment_t)
+#ifdef NEED_16BYTE_ALIGNED_CONTEXT
+ + 15 /* Space for leading alignment gap. */
+#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/
+ );
+
+ /* Space needed per mode. */
+ switch (mode)
+ {
+ case GCRY_CIPHER_MODE_XTS:
+ /* Additional cipher context for tweak. */
+ size += 2 * spec->contextsize + 15;
+ break;
+
+ default:
+ break;
+ }
+
+ if (secure)
+ h = xtrycalloc_secure (1, size);
+ else
+ h = xtrycalloc (1, size);
+
+ if (! h)
+ err = gpg_err_code_from_syserror ();
+ else
+ {
+ size_t off = 0;
+ char *tc;
+
+#ifdef NEED_16BYTE_ALIGNED_CONTEXT
+ if ( ((uintptr_t)h & 0x0f) )
+ {
+ /* The malloced block is not aligned on a 16 byte
+ boundary. Correct for this. */
+ off = 16 - ((uintptr_t)h & 0x0f);
+ h = (void*)((char*)h + off);
+ }
+#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/
+
+ h->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL;
+ h->actual_handle_size = size - off;
+ h->handle_offset = off;
+ h->spec = spec;
+ h->algo = algo;
+ h->mode = mode;
+ h->flags = flags;
+
+ /* Setup mode routines. */
+ _gcry_cipher_setup_mode_ops(h, mode);
+
+ /* Setup defaults depending on the mode. */
+ switch (mode)
+ {
+ case GCRY_CIPHER_MODE_OCB:
+ h->u_mode.ocb.taglen = 16; /* Bytes. */
+ break;
+
+ case GCRY_CIPHER_MODE_XTS:
+ tc = h->context.c + spec->contextsize * 2;
+ tc += (16 - (uintptr_t)tc % 16) % 16;
+ h->u_mode.xts.tweak_context = tc;
+
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ /* Done. */
+
+ *handle = err ? NULL : h;
+
+ return err;
+}
+
+
+/* Release all resources associated with the cipher handle H. H may be
+ NULL in which case this is a no-operation. */
+void
+_gcry_cipher_close (gcry_cipher_hd_t h)
+{
+ size_t off;
+
+ if (!h)
+ return;
+
+ if ((h->magic != CTX_MAGIC_SECURE)
+ && (h->magic != CTX_MAGIC_NORMAL))
+ _gcry_fatal_error(GPG_ERR_INTERNAL,
+ "gcry_cipher_close: already closed/invalid handle");
+ else
+ h->magic = 0;
+
+ /* We always want to wipe out the memory even when the context has
+ been allocated in secure memory. The user might have disabled
+ secure memory or is using his own implementation which does not
+ do the wiping. To accomplish this we need to keep track of the
+ actual size of this structure because we have no way to known
+ how large the allocated area was when using a standard malloc. */
+ off = h->handle_offset;
+ wipememory (h, h->actual_handle_size);
+
+ xfree ((char*)h - off);
+}
+
+
+/* Set the key to be used for the encryption context C to KEY with
+ length KEYLEN. The length should match the required length. */
+static gcry_err_code_t
+cipher_setkey (gcry_cipher_hd_t c, byte *key, size_t keylen)
+{
+ gcry_err_code_t rc;
+
+ if (c->mode == GCRY_CIPHER_MODE_XTS)
+ {
+ /* XTS uses two keys. */
+ if (keylen % 2)
+ return GPG_ERR_INV_KEYLEN;
+ keylen /= 2;
+
+ if (fips_mode ())
+ {
+ /* Reject key if subkeys Key_1 and Key_2 are equal.
+ See "Implementation Guidance for FIPS 140-2, A.9 XTS-AES
+ Key Generation Requirements" for details. */
+ if (buf_eq_const (key, key + keylen, keylen))
+ return GPG_ERR_WEAK_KEY;
+ }
+ }
+
+ rc = c->spec->setkey (&c->context.c, key, keylen, &c->bulk);
+ if (!rc || (c->marks.allow_weak_key && rc == GPG_ERR_WEAK_KEY))
+ {
+ /* Duplicate initial context. */
+ memcpy ((void *) ((char *) &c->context.c + c->spec->contextsize),
+ (void *) &c->context.c,
+ c->spec->contextsize);
+ c->marks.key = 1;
+
+ switch (c->mode)
+ {
+ case GCRY_CIPHER_MODE_CMAC:
+ rc = _gcry_cipher_cmac_set_subkeys (c);
+ break;
+
+ case GCRY_CIPHER_MODE_EAX:
+ rc = _gcry_cipher_eax_setkey (c);
+ break;
+
+ case GCRY_CIPHER_MODE_GCM:
+ _gcry_cipher_gcm_setkey (c);
+ break;
+
+ case GCRY_CIPHER_MODE_OCB:
+ _gcry_cipher_ocb_setkey (c);
+ break;
+
+ case GCRY_CIPHER_MODE_POLY1305:
+ _gcry_cipher_poly1305_setkey (c);
+ break;
+
+ case GCRY_CIPHER_MODE_XTS:
+ /* Setup tweak cipher with second part of XTS key. */
+ rc = c->spec->setkey (c->u_mode.xts.tweak_context, key + keylen,
+ keylen, &c->bulk);
+ if (!rc || (c->marks.allow_weak_key && rc == GPG_ERR_WEAK_KEY))
+ {
+ /* Duplicate initial tweak context. */
+ memcpy (c->u_mode.xts.tweak_context + c->spec->contextsize,
+ c->u_mode.xts.tweak_context, c->spec->contextsize);
+ }
+ else
+ c->marks.key = 0;
+ break;
+
+ default:
+ break;
+ };
+ }
+ else
+ c->marks.key = 0;
+
+ return rc;
+}
+
+
+/* Set the IV to be used for the encryption context C to IV with
+ length IVLEN. The length should match the required length. */
+static gcry_err_code_t
+cipher_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
+{
+ /* If the cipher has its own IV handler, we use only this one. This
+ is currently used for stream ciphers requiring a nonce. */
+ if (c->spec->setiv)
+ {
+ c->spec->setiv (&c->context.c, iv, ivlen);
+ return 0;
+ }
+
+ memset (c->u_iv.iv, 0, c->spec->blocksize);
+ if (iv)
+ {
+ if (ivlen != c->spec->blocksize)
+ {
+ log_info ("WARNING: cipher_setiv: ivlen=%u blklen=%u\n",
+ (unsigned int)ivlen, (unsigned int)c->spec->blocksize);
+ fips_signal_error ("IV length does not match blocklength");
+ }
+ if (ivlen > c->spec->blocksize)
+ ivlen = c->spec->blocksize;
+ memcpy (c->u_iv.iv, iv, ivlen);
+ c->marks.iv = 1;
+ }
+ else
+ c->marks.iv = 0;
+ c->unused = 0;
+
+ return 0;
+}
+
+
+/* Reset the cipher context to the initial context. This is basically
+ the same as an release followed by a new. */
+static void
+cipher_reset (gcry_cipher_hd_t c)
+{
+ unsigned int marks_key, marks_allow_weak_key;
+
+ marks_key = c->marks.key;
+ marks_allow_weak_key = c->marks.allow_weak_key;
+
+ memcpy (&c->context.c,
+ (char *) &c->context.c + c->spec->contextsize,
+ c->spec->contextsize);
+ memset (&c->marks, 0, sizeof c->marks);
+ memset (c->u_iv.iv, 0, c->spec->blocksize);
+ memset (c->lastiv, 0, c->spec->blocksize);
+ memset (c->u_ctr.ctr, 0, c->spec->blocksize);
+ c->unused = 0;
+
+ c->marks.key = marks_key;
+ c->marks.allow_weak_key = marks_allow_weak_key;
+
+ switch (c->mode)
+ {
+ case GCRY_CIPHER_MODE_CMAC:
+ _gcry_cmac_reset(&c->u_mode.cmac);
+ break;
+
+ case GCRY_CIPHER_MODE_EAX:
+ _gcry_cmac_reset(&c->u_mode.eax.cmac_header);
+ _gcry_cmac_reset(&c->u_mode.eax.cmac_ciphertext);
+ break;
+
+ case GCRY_CIPHER_MODE_GCM:
+ /* Only clear head of u_mode, keep ghash_key and gcm_table. */
+ {
+ byte *u_mode_pos = (void *)&c->u_mode;
+ byte *ghash_key_pos = c->u_mode.gcm.u_ghash_key.key;
+ size_t u_mode_head_length = ghash_key_pos - u_mode_pos;
+
+ memset (&c->u_mode, 0, u_mode_head_length);
+ }
+ break;
+
+ case GCRY_CIPHER_MODE_POLY1305:
+ memset (&c->u_mode.poly1305, 0, sizeof c->u_mode.poly1305);
+ break;
+
+ case GCRY_CIPHER_MODE_CCM:
+ memset (&c->u_mode.ccm, 0, sizeof c->u_mode.ccm);
+ break;
+
+ case GCRY_CIPHER_MODE_OCB:
+ /* Do not clear precalculated L-values */
+ {
+ byte *u_mode_head_pos = (void *)&c->u_mode.ocb;
+ byte *u_mode_tail_pos = (void *)&c->u_mode.ocb.tag;
+ size_t u_mode_head_length = u_mode_tail_pos - u_mode_head_pos;
+ size_t u_mode_tail_length = sizeof(c->u_mode.ocb) - u_mode_head_length;
+
+ memset (u_mode_tail_pos, 0, u_mode_tail_length);
+
+ /* Setup default taglen. */
+ c->u_mode.ocb.taglen = 16;
+ }
+ break;
+
+ case GCRY_CIPHER_MODE_XTS:
+ memcpy (c->u_mode.xts.tweak_context,
+ c->u_mode.xts.tweak_context + c->spec->contextsize,
+ c->spec->contextsize);
+ break;
+
+ default:
+ break; /* u_mode unused by other modes. */
+ }
+}
+
+
+
+static gcry_err_code_t
+do_ecb_crypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen,
+ gcry_cipher_encrypt_t crypt_fn)
+{
+ unsigned int blocksize = c->spec->blocksize;
+ size_t n, nblocks;
+ unsigned int burn, nburn;
+
+ if (outbuflen < inbuflen)
+ return GPG_ERR_BUFFER_TOO_SHORT;
+ if ((inbuflen % blocksize))
+ return GPG_ERR_INV_LENGTH;
+
+ nblocks = inbuflen / blocksize;
+ burn = 0;
+
+ for (n=0; n < nblocks; n++ )
+ {
+ nburn = crypt_fn (&c->context.c, outbuf, inbuf);
+ burn = nburn > burn ? nburn : burn;
+ inbuf += blocksize;
+ outbuf += blocksize;
+ }
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+ return 0;
+}
+
+static gcry_err_code_t
+do_ecb_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->encrypt);
+}
+
+static gcry_err_code_t
+do_ecb_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->decrypt);
+}
+
+
+static gcry_err_code_t
+do_stream_encrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ (void)outbuflen;
+ c->spec->stencrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen);
+ return 0;
+}
+
+static gcry_err_code_t
+do_stream_decrypt (gcry_cipher_hd_t c,
+ unsigned char *outbuf, size_t outbuflen,
+ const unsigned char *inbuf, size_t inbuflen)
+{
+ (void)outbuflen;
+ c->spec->stdecrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen);
+ return 0;
+}
+
+
+static gcry_err_code_t
+do_encrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ gcry_err_code_t rc;
+
+ (void)outbuflen;
+
+ switch (c->mode)
+ {
+ case GCRY_CIPHER_MODE_CMAC:
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_NONE:
+ if (fips_mode () || !_gcry_get_debug_flag (0))
+ {
+ fips_signal_error ("cipher mode NONE used");
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ }
+ else
+ {
+ if (inbuf != outbuf)
+ memmove (outbuf, inbuf, inbuflen);
+ rc = 0;
+ }
+ break;
+
+ default:
+ log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode );
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ break;
+ }
+
+ return rc;
+}
+
+static gcry_err_code_t
+do_decrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+ const byte *inbuf, size_t inbuflen)
+{
+ gcry_err_code_t rc;
+
+ (void)outbuflen;
+
+ switch (c->mode)
+ {
+ case GCRY_CIPHER_MODE_CMAC:
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ break;
+
+ case GCRY_CIPHER_MODE_NONE:
+ if (fips_mode () || !_gcry_get_debug_flag (0))
+ {
+ fips_signal_error ("cipher mode NONE used");
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ }
+ else
+ {
+ if (inbuf != outbuf)
+ memmove (outbuf, inbuf, inbuflen);
+ rc = 0;
+ }
+ break;
+
+ default:
+ log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode );
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ break;
+ }
+
+ return rc;
+}
+
+
+/****************
+ * Encrypt IN and write it to OUT. If IN is NULL, in-place encryption has
+ * been requested.
+ */
+gcry_err_code_t
+_gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize,
+ const void *in, size_t inlen)
+{
+ gcry_err_code_t rc;
+
+ if (!in) /* Caller requested in-place encryption. */
+ {
+ in = out;
+ inlen = outsize;
+ }
+
+ if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key)
+ {
+ log_error ("cipher_encrypt: key not set\n");
+ return GPG_ERR_MISSING_KEY;
+ }
+
+ rc = h->mode_ops.encrypt (h, out, outsize, in, inlen);
+
+ /* Failsafe: Make sure that the plaintext will never make it into
+ OUT if the encryption returned an error. */
+ if (rc && out)
+ memset (out, 0x42, outsize);
+
+ return rc;
+}
+
+
+/****************
+ * Decrypt IN and write it to OUT. If IN is NULL, in-place encryption has
+ * been requested.
+ */
+gcry_err_code_t
+_gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize,
+ const void *in, size_t inlen)
+{
+ if (!in) /* Caller requested in-place encryption. */
+ {
+ in = out;
+ inlen = outsize;
+ }
+
+ if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key)
+ {
+ log_error ("cipher_decrypt: key not set\n");
+ return GPG_ERR_MISSING_KEY;
+ }
+
+ return h->mode_ops.decrypt (h, out, outsize, in, inlen);
+}
+
+
+/****************
+ * Used for PGP's somewhat strange CFB mode. Only works if
+ * the corresponding flag is set.
+ */
+static void
+cipher_sync (gcry_cipher_hd_t c)
+{
+ if ((c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused)
+ {
+ memmove (c->u_iv.iv + c->unused,
+ c->u_iv.iv, c->spec->blocksize - c->unused);
+ memcpy (c->u_iv.iv,
+ c->lastiv + c->spec->blocksize - c->unused, c->unused);
+ c->unused = 0;
+ }
+}
+
+
+gcry_err_code_t
+_gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, size_t keylen)
+{
+ return cipher_setkey (hd, (void*)key, keylen);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen)
+{
+ return hd->mode_ops.setiv (hd, iv, ivlen);
+}
+
+
+/* Set counter for CTR mode. (CTR,CTRLEN) must denote a buffer of
+ block size length, or (NULL,0) to set the CTR to the all-zero
+ block. */
+gpg_err_code_t
+_gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen)
+{
+ if (ctr && ctrlen == hd->spec->blocksize)
+ {
+ memcpy (hd->u_ctr.ctr, ctr, hd->spec->blocksize);
+ hd->unused = 0;
+ }
+ else if (!ctr || !ctrlen)
+ {
+ memset (hd->u_ctr.ctr, 0, hd->spec->blocksize);
+ hd->unused = 0;
+ }
+ else
+ return GPG_ERR_INV_ARG;
+
+ return 0;
+}
+
+gpg_err_code_t
+_gcry_cipher_getctr (gcry_cipher_hd_t hd, void *ctr, size_t ctrlen)
+{
+ if (ctr && ctrlen == hd->spec->blocksize)
+ memcpy (ctr, hd->u_ctr.ctr, hd->spec->blocksize);
+ else
+ return GPG_ERR_INV_ARG;
+
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf,
+ size_t abuflen)
+{
+ gcry_err_code_t rc;
+
+ if (hd->mode_ops.authenticate)
+ {
+ rc = hd->mode_ops.authenticate (hd, abuf, abuflen);
+ }
+ else
+ {
+ log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode);
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ }
+
+ return rc;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen)
+{
+ gcry_err_code_t rc;
+
+ if (hd->mode_ops.get_tag)
+ {
+ rc = hd->mode_ops.get_tag (hd, outtag, taglen);
+ }
+ else
+ {
+ log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode);
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ }
+
+ return rc;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen)
+{
+ gcry_err_code_t rc;
+
+ if (hd->mode_ops.check_tag)
+ {
+ rc = hd->mode_ops.check_tag (hd, intag, taglen);
+ }
+ else
+ {
+ log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode);
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ }
+
+ return rc;
+}
+
+
+
+static void
+_gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode)
+{
+ /* Setup encryption and decryption routines. */
+ switch (mode)
+ {
+ case GCRY_CIPHER_MODE_STREAM:
+ c->mode_ops.encrypt = do_stream_encrypt;
+ c->mode_ops.decrypt = do_stream_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_ECB:
+ c->mode_ops.encrypt = do_ecb_encrypt;
+ c->mode_ops.decrypt = do_ecb_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_CBC:
+ if (!(c->flags & GCRY_CIPHER_CBC_CTS))
+ {
+ c->mode_ops.encrypt = _gcry_cipher_cbc_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_cbc_decrypt;
+ }
+ else
+ {
+ c->mode_ops.encrypt = _gcry_cipher_cbc_cts_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_cbc_cts_decrypt;
+ }
+ break;
+
+ case GCRY_CIPHER_MODE_CFB:
+ c->mode_ops.encrypt = _gcry_cipher_cfb_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_cfb_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_CFB8:
+ c->mode_ops.encrypt = _gcry_cipher_cfb8_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_cfb8_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_OFB:
+ c->mode_ops.encrypt = _gcry_cipher_ofb_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_ofb_encrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_CTR:
+ c->mode_ops.encrypt = _gcry_cipher_ctr_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_ctr_encrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_AESWRAP:
+ c->mode_ops.encrypt = _gcry_cipher_aeswrap_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_aeswrap_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_CCM:
+ c->mode_ops.encrypt = _gcry_cipher_ccm_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_ccm_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_EAX:
+ c->mode_ops.encrypt = _gcry_cipher_eax_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_eax_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_GCM:
+ c->mode_ops.encrypt = _gcry_cipher_gcm_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_gcm_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_POLY1305:
+ c->mode_ops.encrypt = _gcry_cipher_poly1305_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_poly1305_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_OCB:
+ c->mode_ops.encrypt = _gcry_cipher_ocb_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_ocb_decrypt;
+ break;
+
+ case GCRY_CIPHER_MODE_XTS:
+ c->mode_ops.encrypt = _gcry_cipher_xts_encrypt;
+ c->mode_ops.decrypt = _gcry_cipher_xts_decrypt;
+ break;
+
+ default:
+ c->mode_ops.encrypt = do_encrypt_none_unknown;
+ c->mode_ops.decrypt = do_decrypt_none_unknown;
+ break;
+ }
+
+ /* Setup IV setting routine. */
+ switch (mode)
+ {
+ case GCRY_CIPHER_MODE_CCM:
+ c->mode_ops.setiv = _gcry_cipher_ccm_set_nonce;
+ break;
+
+ case GCRY_CIPHER_MODE_EAX:
+ c->mode_ops.setiv = _gcry_cipher_eax_set_nonce;
+ break;
+
+ case GCRY_CIPHER_MODE_GCM:
+ c->mode_ops.setiv = _gcry_cipher_gcm_setiv;
+ break;
+
+ case GCRY_CIPHER_MODE_POLY1305:
+ c->mode_ops.setiv = _gcry_cipher_poly1305_setiv;
+ break;
+
+ case GCRY_CIPHER_MODE_OCB:
+ c->mode_ops.setiv = _gcry_cipher_ocb_set_nonce;
+ break;
+
+ default:
+ c->mode_ops.setiv = cipher_setiv;
+ break;
+ }
+
+
+ /* Setup authentication routines for AEAD modes. */
+ switch (mode)
+ {
+ case GCRY_CIPHER_MODE_CCM:
+ c->mode_ops.authenticate = _gcry_cipher_ccm_authenticate;
+ c->mode_ops.get_tag = _gcry_cipher_ccm_get_tag;
+ c->mode_ops.check_tag = _gcry_cipher_ccm_check_tag;
+ break;
+
+ case GCRY_CIPHER_MODE_CMAC:
+ c->mode_ops.authenticate = _gcry_cipher_cmac_authenticate;
+ c->mode_ops.get_tag = _gcry_cipher_cmac_get_tag;
+ c->mode_ops.check_tag = _gcry_cipher_cmac_check_tag;
+ break;
+
+ case GCRY_CIPHER_MODE_EAX:
+ c->mode_ops.authenticate = _gcry_cipher_eax_authenticate;
+ c->mode_ops.get_tag = _gcry_cipher_eax_get_tag;
+ c->mode_ops.check_tag = _gcry_cipher_eax_check_tag;
+ break;
+
+ case GCRY_CIPHER_MODE_GCM:
+ c->mode_ops.authenticate = _gcry_cipher_gcm_authenticate;
+ c->mode_ops.get_tag = _gcry_cipher_gcm_get_tag;
+ c->mode_ops.check_tag = _gcry_cipher_gcm_check_tag;
+ break;
+
+ case GCRY_CIPHER_MODE_POLY1305:
+ c->mode_ops.authenticate = _gcry_cipher_poly1305_authenticate;
+ c->mode_ops.get_tag = _gcry_cipher_poly1305_get_tag;
+ c->mode_ops.check_tag = _gcry_cipher_poly1305_check_tag;
+ break;
+
+ case GCRY_CIPHER_MODE_OCB:
+ c->mode_ops.authenticate = _gcry_cipher_ocb_authenticate;
+ c->mode_ops.get_tag = _gcry_cipher_ocb_get_tag;
+ c->mode_ops.check_tag = _gcry_cipher_ocb_check_tag;
+ break;
+
+ default:
+ c->mode_ops.authenticate = NULL;
+ c->mode_ops.get_tag = NULL;
+ c->mode_ops.check_tag = NULL;
+ break;
+ }
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen)
+{
+ gcry_err_code_t rc = 0;
+
+ switch (cmd)
+ {
+ case GCRYCTL_RESET:
+ cipher_reset (h);
+ break;
+
+ case GCRYCTL_FINALIZE:
+ if (!h || buffer || buflen)
+ return GPG_ERR_INV_ARG;
+ h->marks.finalize = 1;
+ break;
+
+ case GCRYCTL_CFB_SYNC:
+ cipher_sync( h );
+ break;
+
+ case GCRYCTL_SET_CBC_CTS:
+ if (buflen)
+ if (h->flags & GCRY_CIPHER_CBC_MAC)
+ rc = GPG_ERR_INV_FLAG;
+ else
+ h->flags |= GCRY_CIPHER_CBC_CTS;
+ else
+ h->flags &= ~GCRY_CIPHER_CBC_CTS;
+ break;
+
+ case GCRYCTL_SET_CBC_MAC:
+ if (buflen)
+ if (h->flags & GCRY_CIPHER_CBC_CTS)
+ rc = GPG_ERR_INV_FLAG;
+ else
+ h->flags |= GCRY_CIPHER_CBC_MAC;
+ else
+ h->flags &= ~GCRY_CIPHER_CBC_MAC;
+ break;
+
+ case GCRYCTL_SET_CCM_LENGTHS:
+ {
+ u64 params[3];
+ size_t encryptedlen;
+ size_t aadlen;
+ size_t authtaglen;
+
+ if (h->mode != GCRY_CIPHER_MODE_CCM)
+ return GPG_ERR_INV_CIPHER_MODE;
+
+ if (!buffer || buflen != 3 * sizeof(u64))
+ return GPG_ERR_INV_ARG;
+
+ /* This command is used to pass additional length parameters needed
+ by CCM mode to initialize CBC-MAC. */
+ memcpy (params, buffer, sizeof(params));
+ encryptedlen = params[0];
+ aadlen = params[1];
+ authtaglen = params[2];
+
+ rc = _gcry_cipher_ccm_set_lengths (h, encryptedlen, aadlen, authtaglen);
+ }
+ break;
+
+ case GCRYCTL_SET_TAGLEN:
+ if (!h || !buffer || buflen != sizeof(int) )
+ return GPG_ERR_INV_ARG;
+ switch (h->mode)
+ {
+ case GCRY_CIPHER_MODE_OCB:
+ switch (*(int*)buffer)
+ {
+ case 8: case 12: case 16:
+ h->u_mode.ocb.taglen = *(int*)buffer;
+ break;
+ default:
+ rc = GPG_ERR_INV_LENGTH; /* Invalid tag length. */
+ break;
+ }
+ break;
+
+ default:
+ rc =GPG_ERR_INV_CIPHER_MODE;
+ break;
+ }
+ break;
+
+ case GCRYCTL_DISABLE_ALGO:
+ /* This command expects NULL for H and BUFFER to point to an
+ integer with the algo number. */
+ if( h || !buffer || buflen != sizeof(int) )
+ return GPG_ERR_CIPHER_ALGO;
+ disable_cipher_algo( *(int*)buffer );
+ break;
+
+ case PRIV_CIPHERCTL_DISABLE_WEAK_KEY: /* (private) */
+ if (h->spec->set_extra_info)
+ rc = h->spec->set_extra_info
+ (&h->context.c, CIPHER_INFO_NO_WEAK_KEY, NULL, 0);
+ else
+ rc = GPG_ERR_NOT_SUPPORTED;
+ break;
+
+ case PRIV_CIPHERCTL_GET_INPUT_VECTOR: /* (private) */
+ /* This is the input block as used in CFB and OFB mode which has
+ initially been set as IV. The returned format is:
+ 1 byte Actual length of the block in bytes.
+ n byte The block.
+ If the provided buffer is too short, an error is returned. */
+ if (buflen < (1 + h->spec->blocksize))
+ rc = GPG_ERR_TOO_SHORT;
+ else
+ {
+ unsigned char *ivp;
+ unsigned char *dst = buffer;
+ int n = h->unused;
+
+ if (!n)
+ n = h->spec->blocksize;
+ gcry_assert (n <= h->spec->blocksize);
+ *dst++ = n;
+ ivp = h->u_iv.iv + h->spec->blocksize - n;
+ while (n--)
+ *dst++ = *ivp++;
+ }
+ break;
+
+ case GCRYCTL_SET_SBOX:
+ if (h->spec->set_extra_info)
+ rc = h->spec->set_extra_info
+ (&h->context.c, GCRYCTL_SET_SBOX, buffer, buflen);
+ else
+ rc = GPG_ERR_NOT_SUPPORTED;
+ break;
+
+ case GCRYCTL_SET_ALLOW_WEAK_KEY:
+ /* Expecting BUFFER to be NULL and buflen to be on/off flag (0 or 1). */
+ if (!h || buffer || buflen > 1)
+ return GPG_ERR_CIPHER_ALGO;
+ h->marks.allow_weak_key = buflen ? 1 : 0;
+ break;
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+
+ return rc;
+}
+
+
+/* Return information about the cipher handle H. CMD is the kind of
+ * information requested.
+ *
+ * CMD may be one of:
+ *
+ * GCRYCTL_GET_TAGLEN:
+ * Return the length of the tag for an AE algorithm mode. An
+ * error is returned for modes which do not support a tag.
+ * BUFFER must be given as NULL. On success the result is stored
+ * at NBYTES. The taglen is returned in bytes.
+ *
+ * The function returns 0 on success or an error code.
+ */
+gcry_err_code_t
+_gcry_cipher_info (gcry_cipher_hd_t h, int cmd, void *buffer, size_t *nbytes)
+{
+ gcry_err_code_t rc = 0;
+
+ switch (cmd)
+ {
+ case GCRYCTL_GET_TAGLEN:
+ if (!h || buffer || !nbytes)
+ rc = GPG_ERR_INV_ARG;
+ else
+ {
+ switch (h->mode)
+ {
+ case GCRY_CIPHER_MODE_OCB:
+ *nbytes = h->u_mode.ocb.taglen;
+ break;
+
+ case GCRY_CIPHER_MODE_CCM:
+ *nbytes = h->u_mode.ccm.authlen;
+ break;
+
+ case GCRY_CIPHER_MODE_EAX:
+ *nbytes = h->spec->blocksize;
+ break;
+
+ case GCRY_CIPHER_MODE_GCM:
+ *nbytes = GCRY_GCM_BLOCK_LEN;
+ break;
+
+ case GCRY_CIPHER_MODE_POLY1305:
+ *nbytes = POLY1305_TAGLEN;
+ break;
+
+ default:
+ rc = GPG_ERR_INV_CIPHER_MODE;
+ break;
+ }
+ }
+ break;
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+
+ return rc;
+}
+
+/* Return information about the given cipher algorithm ALGO.
+
+ WHAT select the kind of information returned:
+
+ GCRYCTL_GET_KEYLEN:
+ Return the length of the key. If the algorithm ALGO
+ supports multiple key lengths, the maximum supported key length
+ is returned. The key length is returned as number of octets.
+ BUFFER and NBYTES must be zero.
+
+ GCRYCTL_GET_BLKLEN:
+ Return the blocklength of the algorithm ALGO counted in octets.
+ BUFFER and NBYTES must be zero.
+
+ GCRYCTL_TEST_ALGO:
+ Returns 0 if the specified algorithm ALGO is available for use.
+ BUFFER and NBYTES must be zero.
+
+ Note: Because this function is in most cases used to return an
+ integer value, we can make it easier for the caller to just look at
+ the return value. The caller will in all cases consult the value
+ and thereby detecting whether a error occurred or not (i.e. while
+ checking the block size)
+ */
+gcry_err_code_t
+_gcry_cipher_algo_info (int algo, int what, void *buffer, size_t *nbytes)
+{
+ gcry_err_code_t rc = 0;
+ unsigned int ui;
+
+ switch (what)
+ {
+ case GCRYCTL_GET_KEYLEN:
+ if (buffer || (! nbytes))
+ rc = GPG_ERR_CIPHER_ALGO;
+ else
+ {
+ ui = cipher_get_keylen (algo);
+ if ((ui > 0) && (ui <= 512))
+ *nbytes = (size_t) ui / 8;
+ else
+ /* The only reason for an error is an invalid algo. */
+ rc = GPG_ERR_CIPHER_ALGO;
+ }
+ break;
+
+ case GCRYCTL_GET_BLKLEN:
+ if (buffer || (! nbytes))
+ rc = GPG_ERR_CIPHER_ALGO;
+ else
+ {
+ ui = cipher_get_blocksize (algo);
+ if ((ui > 0) && (ui < 10000))
+ *nbytes = ui;
+ else
+ {
+ /* The only reason is an invalid algo or a strange
+ blocksize. */
+ rc = GPG_ERR_CIPHER_ALGO;
+ }
+ }
+ break;
+
+ case GCRYCTL_TEST_ALGO:
+ if (buffer || nbytes)
+ rc = GPG_ERR_INV_ARG;
+ else
+ rc = check_cipher_algo (algo);
+ break;
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+
+ return rc;
+}
+
+
+/* This function returns length of the key for algorithm ALGO. If the
+ algorithm supports multiple key lengths, the maximum supported key
+ length is returned. On error 0 is returned. The key length is
+ returned as number of octets.
+
+ This is a convenience functions which should be preferred over
+ gcry_cipher_algo_info because it allows for proper type
+ checking. */
+size_t
+_gcry_cipher_get_algo_keylen (int algo)
+{
+ size_t n;
+
+ if (_gcry_cipher_algo_info (algo, GCRYCTL_GET_KEYLEN, NULL, &n))
+ n = 0;
+ return n;
+}
+
+
+/* This functions returns the blocklength of the algorithm ALGO
+ counted in octets. On error 0 is returned.
+
+ This is a convenience functions which should be preferred over
+ gcry_cipher_algo_info because it allows for proper type
+ checking. */
+size_t
+_gcry_cipher_get_algo_blklen (int algo)
+{
+ size_t n;
+
+ if (_gcry_cipher_algo_info( algo, GCRYCTL_GET_BLKLEN, NULL, &n))
+ n = 0;
+ return n;
+}
+
+
+/* Explicitly initialize this module. */
+gcry_err_code_t
+_gcry_cipher_init (void)
+{
+ if (fips_mode())
+ {
+ /* disable algorithms that are disallowed in fips */
+ int idx;
+ gcry_cipher_spec_t *spec;
+
+ for (idx = 0; (spec = cipher_list[idx]); idx++)
+ if (!spec->flags.fips)
+ spec->flags.disabled = 1;
+ }
+
+ return 0;
+}
+
+
+/* Run the selftests for cipher algorithm ALGO with optional reporting
+ function REPORT. */
+gpg_error_t
+_gcry_cipher_selftest (int algo, int extended, selftest_report_func_t report)
+{
+ gcry_err_code_t ec = 0;
+ gcry_cipher_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (spec && !spec->flags.disabled && spec->selftest)
+ ec = spec->selftest (algo, extended, report);
+ else
+ {
+ ec = GPG_ERR_CIPHER_ALGO;
+ if (report)
+ report ("cipher", algo, "module",
+ (spec && !spec->flags.disabled)?
+ "no selftest available" :
+ spec? "algorithm disabled" : "algorithm not found");
+ }
+
+ return gpg_error (ec);
+}
diff --git a/comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S b/comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S
new file mode 100644
index 0000000000..060abdfe9a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S
@@ -0,0 +1,497 @@
+/* crc-armv8-aarch64-ce.S - ARMv8/CE PMULL accelerated CRC implementation
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Structure of crc32_consts_s */
+
+#define consts_k(idx) ((idx) * 8)
+#define consts_my_p(idx) (consts_k(6) + (idx) * 8)
+
+/* Constants */
+
+.align 6
+.Lcrc32_constants:
+.Lcrc32_partial_fold_input_mask:
+ .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+.Lcrc32_refl_shuf_shift:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+.Lcrc32_shuf_shift:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+.Lcrc32_bswap_shuf:
+ .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
+ .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+
+/*
+ * void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ * const struct crc32_consts_s *consts);
+ */
+.align 3
+.globl _gcry_crc32r_armv8_ce_bulk
+ELF(.type _gcry_crc32r_armv8_ce_bulk,%function;)
+_gcry_crc32r_armv8_ce_bulk:
+ /* input:
+ * x0: pcrc
+ * x1: inbuf
+ * x2: inlen
+ * x3: consts
+ */
+ CFI_STARTPROC()
+
+ GET_DATA_POINTER(x7, .Lcrc32_constants)
+ add x9, x3, #consts_k(5 - 1)
+ cmp x2, #128
+
+ b.lo .Lcrc32r_fold_by_one_setup
+
+ eor v4.16b, v4.16b, v4.16b
+ add x4, x3, #consts_k(1 - 1)
+ ld1 {v4.s}[0], [x0] /* load pcrc */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* load 64 bytes of input */
+ sub x2, x2, #64
+ ld1 {v6.16b}, [x4]
+ eor v0.16b, v0.16b, v4.16b
+
+ add x4, x3, #consts_k(3 - 1)
+ add x5, x3, #consts_my_p(0)
+
+.Lcrc32r_fold_by_four:
+
+ /* Fold by 4. */
+ ld1 {v16.16b-v19.16b}, [x1], #64 /* load 64 bytes of input */
+ sub x2, x2, #64
+ pmull v20.1q, v0.1d, v6.1d
+ pmull v21.1q, v1.1d, v6.1d
+ pmull v22.1q, v2.1d, v6.1d
+ pmull v23.1q, v3.1d, v6.1d
+ cmp x2, #64
+ pmull2 v24.1q, v0.2d, v6.2d
+ pmull2 v25.1q, v1.2d, v6.2d
+ pmull2 v26.1q, v2.2d, v6.2d
+ pmull2 v27.1q, v3.2d, v6.2d
+ eor v0.16b, v20.16b, v16.16b
+ eor v1.16b, v21.16b, v17.16b
+ eor v2.16b, v22.16b, v18.16b
+ eor v3.16b, v23.16b, v19.16b
+ eor v0.16b, v0.16b, v24.16b
+ eor v1.16b, v1.16b, v25.16b
+ eor v2.16b, v2.16b, v26.16b
+ eor v3.16b, v3.16b, v27.16b
+ b.hs .Lcrc32r_fold_by_four
+
+ ld1 {v6.16b}, [x4]
+ ld1 {v5.16b}, [x5]
+
+ cmp x2, #16
+
+ /* Fold 4 to 1. */
+
+ pmull v16.1q, v0.1d, v6.1d
+ pmull2 v4.1q, v0.2d, v6.2d
+ eor v0.16b, v16.16b, v1.16b
+ eor v0.16b, v0.16b, v4.16b
+
+ pmull v16.1q, v0.1d, v6.1d
+ pmull2 v4.1q, v0.2d, v6.2d
+ eor v0.16b, v16.16b, v2.16b
+ eor v0.16b, v0.16b, v4.16b
+
+ pmull v16.1q, v0.1d, v6.1d
+ pmull2 v4.1q, v0.2d, v6.2d
+ eor v0.16b, v16.16b, v3.16b
+ eor v0.16b, v0.16b, v4.16b
+
+ b.lo .Lcrc32r_fold_by_one_done
+ b .Lcrc32r_fold_by_one
+
+.Lcrc32r_fold_by_one_setup:
+
+ eor v1.16b, v1.16b, v1.16b
+ add x4, x3, #consts_k(3 - 1)
+ add x5, x3, #consts_my_p(0)
+ sub x2, x2, #16
+ ld1 {v1.s}[0], [x0] /* load pcrc */
+ ld1 {v0.16b}, [x1], #16 /* load 16 bytes of input */
+ cmp x2, #16
+ ld1 {v6.16b}, [x4] /* load k3k4 */
+ ld1 {v5.16b}, [x5] /* load my_p */
+ eor v0.16b, v0.16b, v1.16b
+ b.lo .Lcrc32r_fold_by_one_done
+
+.Lcrc32r_fold_by_one:
+ sub x2, x2, #16
+ ld1 {v2.16b}, [x1], #16 /* load 16 bytes of input */
+ pmull v3.1q, v0.1d, v6.1d
+ pmull2 v1.1q, v0.2d, v6.2d
+ cmp x2, #16
+ eor v0.16b, v3.16b, v2.16b
+ eor v0.16b, v0.16b, v1.16b
+
+ b.hs .Lcrc32r_fold_by_one
+
+.Lcrc32r_fold_by_one_done:
+
+ cmp x2, #0
+ b.eq .Lcrc32r_final_fold
+
+ /* Partial fold. */
+
+ add x4, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants
+ add x5, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants + 16
+ add x6, x7, #.Lcrc32_partial_fold_input_mask - .Lcrc32_constants
+ sub x8, x2, #16
+ add x4, x4, x2
+ add x5, x5, x2
+ add x6, x6, x2
+ add x8, x1, x8
+
+ /* Load last input and add padding zeros. */
+ ld1 {v4.16b}, [x4]
+ eor x2, x2, x2
+ ld1 {v3.16b}, [x5]
+ ld1 {v2.16b}, [x6]
+ tbl v30.16b, {v0.16b}, v4.16b
+ ld1 {v4.16b}, [x8]
+ tbl v1.16b, {v0.16b}, v3.16b
+
+ pmull v0.1q, v30.1d, v6.1d
+ and v2.16b, v2.16b, v4.16b
+ pmull2 v31.1q, v30.2d, v6.2d
+ orr v2.16b, v2.16b, v1.16b
+ eor v0.16b, v0.16b, v31.16b
+ eor v0.16b, v0.16b, v2.16b
+
+.Lcrc32r_final_fold:
+
+ /* Final fold. */
+
+ eor v2.16b, v2.16b, v2.16b /* zero reg */
+ ld1 {v7.16b}, [x9]
+
+ /* reduce 128-bits to 96-bits */
+ ext v6.16b, v6.16b, v6.16b, #8 /* swap high and low parts */
+ mov v1.16b, v0.16b
+ pmull v0.1q, v0.1d, v6.1d
+ ext v6.16b, v5.16b, v5.16b, #8 /* swap high and low parts */
+ ext v1.16b, v1.16b, v2.16b, #8 /* high to low, high zeroed */
+ eor v3.16b, v0.16b, v1.16b
+
+ /* reduce 96-bits to 64-bits */
+ eor v1.16b, v1.16b, v1.16b
+ ext v0.16b, v3.16b, v2.16b, #4 /* [00][00][x2][x1] */
+ mov v1.s[0], v3.s[0] /* [00][00][00][x0] */
+ eor v3.16b, v3.16b, v3.16b
+ pmull v1.1q, v1.1d, v7.1d /* [00][00][xx][xx] */
+ eor v0.16b, v0.16b, v1.16b /* top 64-bit are zero */
+
+ /* barrett reduction */
+ mov v3.s[1], v0.s[0] /* [00][00][x1][00] */
+ ext v0.16b, v2.16b, v0.16b, #12 /* [??][x1][??][00] */
+ pmull v1.1q, v3.1d, v5.1d /* [00][xx][xx][00] */
+ pmull v1.1q, v1.1d, v6.1d /* [00][xx][xx][00] */
+ eor v0.16b, v0.16b, v1.16b
+
+ /* store CRC */
+ st1 {v0.s}[2], [x0]
+
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_crc32r_armv8_ce_bulk,.-_gcry_crc32r_armv8_ce_bulk;)
+
+/*
+ * void _gcry_crc32r_armv8_ce_reduction_4 (u32 *pcrc, u32 data, u32 crc,
+ * const struct crc32_consts_s *consts);
+ */
+.align 3
+.globl _gcry_crc32r_armv8_ce_reduction_4
+ELF(.type _gcry_crc32r_armv8_ce_reduction_4,%function;)
+_gcry_crc32r_armv8_ce_reduction_4:
+ /* input:
+ * w0: data
+ * w1: crc
+ * x2: crc32 constants
+ */
+ CFI_STARTPROC()
+
+ eor v0.16b, v0.16b, v0.16b
+ add x2, x2, #consts_my_p(0)
+ eor v1.16b, v1.16b, v1.16b
+ ld1 {v5.16b}, [x2]
+
+ mov v0.s[0], w0
+ pmull v0.1q, v0.1d, v5.1d /* [00][00][xx][xx] */
+ mov v1.s[1], w1
+ mov v0.s[2], v0.s[0] /* [00][x0][x1][x0] */
+ pmull2 v0.1q, v0.2d, v5.2d /* [00][00][xx][xx] */
+ eor v0.16b, v0.16b, v1.16b
+
+ mov w0, v0.s[1]
+
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_crc32r_armv8_ce_reduction_4,.-_gcry_crc32r_armv8_ce_reduction_4;)
+
+/*
+ * void _gcry_crc32_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ * const struct crc32_consts_s *consts);
+ */
+.align 3
+.globl _gcry_crc32_armv8_ce_bulk
+ELF(.type _gcry_crc32_armv8_ce_bulk,%function;)
+_gcry_crc32_armv8_ce_bulk:
+ /* input:
+ * x0: pcrc
+ * x1: inbuf
+ * x2: inlen
+ * x3: consts
+ */
+ CFI_STARTPROC()
+
+ GET_DATA_POINTER(x7, .Lcrc32_constants)
+ add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants
+ cmp x2, #128
+ ld1 {v7.16b}, [x4]
+
+ b.lo .Lcrc32_fold_by_one_setup
+
+ eor v4.16b, v4.16b, v4.16b
+ add x4, x3, #consts_k(1 - 1)
+ ld1 {v4.s}[0], [x0] /* load pcrc */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* load 64 bytes of input */
+ sub x2, x2, #64
+ ld1 {v6.16b}, [x4]
+ eor v0.16b, v0.16b, v4.16b
+ ext v4.16b, v6.16b, v6.16b, #8
+ tbl v0.16b, { v0.16b }, v7.16b /* byte swap */
+ tbl v1.16b, { v1.16b }, v7.16b /* byte swap */
+ tbl v2.16b, { v2.16b }, v7.16b /* byte swap */
+ tbl v3.16b, { v3.16b }, v7.16b /* byte swap */
+
+ add x4, x3, #consts_k(3 - 1)
+ add x5, x3, #consts_my_p(0)
+
+.Lcrc32_fold_by_four:
+
+ /* Fold by 4. */
+ ld1 {v16.16b-v19.16b}, [x1], #64 /* load 64 bytes of input */
+ sub x2, x2, #64
+ tbl v16.16b, { v16.16b }, v7.16b /* byte swap */
+ tbl v17.16b, { v17.16b }, v7.16b /* byte swap */
+ tbl v18.16b, { v18.16b }, v7.16b /* byte swap */
+ tbl v19.16b, { v19.16b }, v7.16b /* byte swap */
+ cmp x2, #64
+ pmull2 v20.1q, v0.2d, v4.2d
+ pmull2 v21.1q, v1.2d, v4.2d
+ pmull2 v22.1q, v2.2d, v4.2d
+ pmull2 v23.1q, v3.2d, v4.2d
+ pmull v24.1q, v0.1d, v4.1d
+ pmull v25.1q, v1.1d, v4.1d
+ pmull v26.1q, v2.1d, v4.1d
+ pmull v27.1q, v3.1d, v4.1d
+ eor v0.16b, v20.16b, v16.16b
+ eor v1.16b, v21.16b, v17.16b
+ eor v2.16b, v22.16b, v18.16b
+ eor v3.16b, v23.16b, v19.16b
+ eor v0.16b, v0.16b, v24.16b
+ eor v1.16b, v1.16b, v25.16b
+ eor v2.16b, v2.16b, v26.16b
+ eor v3.16b, v3.16b, v27.16b
+ b.hs .Lcrc32_fold_by_four
+
+ ld1 {v6.16b}, [x4]
+ ld1 {v5.16b}, [x5]
+ ext v6.16b, v6.16b, v6.16b, #8
+ ext v5.16b, v5.16b, v5.16b, #8
+
+ cmp x2, #16
+
+ /* Fold 4 to 1. */
+
+ pmull2 v16.1q, v0.2d, v6.2d
+ pmull v4.1q, v0.1d, v6.1d
+ eor v0.16b, v16.16b, v1.16b
+ eor v0.16b, v0.16b, v4.16b
+
+ pmull2 v16.1q, v0.2d, v6.2d
+ pmull v4.1q, v0.1d, v6.1d
+ eor v0.16b, v16.16b, v2.16b
+ eor v0.16b, v0.16b, v4.16b
+
+ pmull2 v16.1q, v0.2d, v6.2d
+ pmull v4.1q, v0.1d, v6.1d
+ eor v0.16b, v16.16b, v3.16b
+ eor v0.16b, v0.16b, v4.16b
+
+ b.lo .Lcrc32_fold_by_one_done
+ b .Lcrc32_fold_by_one
+
+.Lcrc32_fold_by_one_setup:
+
+ eor v1.16b, v1.16b, v1.16b
+ add x4, x3, #consts_k(3 - 1)
+ add x5, x3, #consts_my_p(0)
+ ld1 {v1.s}[0], [x0] /* load pcrc */
+ sub x2, x2, #16
+ ld1 {v0.16b}, [x1], #16 /* load 16 bytes of input */
+ ld1 {v6.16b}, [x4] /* load k3k4 */
+ ld1 {v5.16b}, [x5] /* load my_p */
+ eor v0.16b, v0.16b, v1.16b
+ cmp x2, #16
+ ext v6.16b, v6.16b, v6.16b, #8 /* swap high and low parts */
+ ext v5.16b, v5.16b, v5.16b, #8 /* swap high and low parts */
+ tbl v0.16b, { v0.16b }, v7.16b /* byte swap */
+ b.lo .Lcrc32_fold_by_one_done
+
+.Lcrc32_fold_by_one:
+ sub x2, x2, #16
+ ld1 {v2.16b}, [x1], #16 /* load 16 bytes of input */
+ pmull2 v3.1q, v0.2d, v6.2d
+ tbl v2.16b, { v2.16b }, v7.16b /* byte swap */
+ pmull v1.1q, v0.1d, v6.1d
+ cmp x2, #16
+ eor v0.16b, v3.16b, v2.16b
+ eor v0.16b, v0.16b, v1.16b
+
+ b.hs .Lcrc32_fold_by_one
+
+.Lcrc32_fold_by_one_done:
+
+ cmp x2, #0
+ b.eq .Lcrc32_final_fold
+
+ /* Partial fold. */
+
+ add x4, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants + 32
+ add x5, x7, #.Lcrc32_shuf_shift - .Lcrc32_constants + 16
+ add x6, x7, #.Lcrc32_partial_fold_input_mask - .Lcrc32_constants
+ sub x8, x2, #16
+ sub x4, x4, x2
+ add x5, x5, x2
+ add x6, x6, x2
+ add x8, x1, x8
+
+ /* Load last input and add padding zeros. */
+ ld1 {v4.16b}, [x4]
+ eor x2, x2, x2
+ ld1 {v3.16b}, [x5]
+ ld1 {v2.16b}, [x6]
+ tbl v30.16b, {v0.16b}, v4.16b
+ ld1 {v4.16b}, [x8]
+ tbl v1.16b, {v0.16b}, v3.16b
+ and v2.16b, v2.16b, v4.16b
+
+ pmull2 v0.1q, v30.2d, v6.2d
+ orr v2.16b, v2.16b, v1.16b
+ pmull v1.1q, v30.1d, v6.1d
+ tbl v2.16b, {v2.16b}, v7.16b /* byte swap */
+ eor v0.16b, v0.16b, v1.16b
+ eor v0.16b, v0.16b, v2.16b
+
+.Lcrc32_final_fold:
+
+ /* Final fold. */
+
+ eor v2.16b, v2.16b, v2.16b /* zero reg */
+
+ /* reduce 128-bits to 96-bits */
+ add x4, x3, #consts_k(4)
+ ext v3.16b, v6.16b, v6.16b, #8 /* swap high and low parts */
+ eor v6.16b, v6.16b, v6.16b
+ mov v1.16b, v0.16b
+ pmull2 v0.1q, v0.2d, v3.2d
+ ld1 {v6.d}[1], [x4] /* load k4 */
+ ext v1.16b, v2.16b, v1.16b, #8 /* low to high, low zeroed */
+ eor v3.16b, v0.16b, v1.16b /* bottom 32-bit are zero */
+
+ /* reduce 96-bits to 64-bits */
+ eor v0.16b, v0.16b, v0.16b
+ eor v1.16b, v1.16b, v1.16b
+ mov v0.s[1], v3.s[1] /* [00][00][x1][00] */
+ mov v1.s[2], v3.s[3] /* [00][x3][00][00] */
+ mov v0.s[2], v3.s[2] /* [00][x2][x1][00] */
+ eor v3.16b, v3.16b, v3.16b
+ pmull2 v1.1q, v1.2d, v6.2d /* [00][xx][xx][00] */
+ eor v0.16b, v0.16b, v1.16b /* top and bottom 32-bit are zero */
+
+ /* barrett reduction */
+ mov v3.s[0], v0.s[1] /* [00][00][00][x1] */
+ pmull2 v0.1q, v0.2d, v5.2d /* [00][xx][xx][xx] */
+ ext v0.16b, v0.16b, v2.16b, #4 /* [00][00][xx][xx] */
+ pmull v0.1q, v0.1d, v5.1d
+ eor v0.16b, v0.16b, v3.16b
+
+ /* store CRC in input endian */
+ rev32 v0.8b, v0.8b /* byte swap */
+ st1 {v0.s}[0], [x0]
+
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_crc32_armv8_ce_bulk,.-_gcry_crc32_armv8_ce_bulk;)
+
+/*
+ * void _gcry_crc32_armv8_ce_reduction_4 (u32 *pcrc, u32 data, u32 crc,
+ * const struct crc32_consts_s *consts);
+ */
+.align 3
+.globl _gcry_crc32_armv8_ce_reduction_4
+ELF(.type _gcry_crc32_armv8_ce_reduction_4,%function;)
+_gcry_crc32_armv8_ce_reduction_4:
+ /* input:
+ * w0: data
+ * w1: crc
+ * x2: crc32 constants
+ */
+ CFI_STARTPROC()
+
+ eor v0.16b, v0.16b, v0.16b
+ add x2, x2, #consts_my_p(0)
+ eor v1.16b, v1.16b, v1.16b
+ ld1 {v5.16b}, [x2]
+
+ mov v0.s[1], w0
+ pmull v0.1q, v0.1d, v5.1d /* [00][xx][xx][00] */
+ mov v1.s[0], w1
+ pmull2 v0.1q, v0.2d, v5.2d /* [00][00][xx][xx] */
+ eor v0.16b, v0.16b, v1.16b
+
+ rev32 v0.8b, v0.8b /* Return in input endian */
+ mov w0, v0.s[0]
+
+ ret
+ CFI_ENDPROC()
+ELF(.size _gcry_crc32_armv8_ce_reduction_4,.-_gcry_crc32_armv8_ce_reduction_4;)
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/crc-armv8-ce.c b/comm/third_party/libgcrypt/cipher/crc-armv8-ce.c
new file mode 100644
index 0000000000..17e5554821
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/crc-armv8-ce.c
@@ -0,0 +1,229 @@
+/* crc-armv8-ce.c - ARMv8-CE PMULL accelerated CRC implementation
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+
+#if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+
+#define ALIGNED_16 __attribute__ ((aligned (16)))
+
+
+struct u16_unaligned_s
+{
+ u16 a;
+} __attribute__((packed, aligned (1), may_alias));
+
+struct u32_unaligned_s
+{
+ u32 a;
+} __attribute__((packed, aligned (1), may_alias));
+
+
+/* Constants structure for generic reflected/non-reflected CRC32 PMULL
+ * functions. */
+struct crc32_consts_s
+{
+ /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
+ u64 k[6];
+ /* my_p: { floor(x^64 / P(x)), P(x) } */
+ u64 my_p[2];
+};
+
+/* PMULL constants for CRC32 and CRC32RFC1510. */
+static const struct crc32_consts_s crc32_consts ALIGNED_16 =
+{
+ { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
+ U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
+ U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
+ U64_C(0x163cd6124), 0 /* y = 2 */
+ },
+ { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
+ U64_C(0x1f7011641), U64_C(0x1db710641)
+ }
+};
+
+/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
+static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 =
+{
+ { /* k[6] = x^(32*y) mod P(x) << 32*/
+ U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
+ U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
+ U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */
+ },
+ { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
+ U64_C(0x1f845fe24), U64_C(0x1864cfb00)
+ }
+};
+
+
+u32 _gcry_crc32r_armv8_ce_reduction_4 (u32 data, u32 crc,
+ const struct crc32_consts_s *consts);
+void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts);
+
+u32 _gcry_crc32_armv8_ce_reduction_4 (u32 data, u32 crc,
+ const struct crc32_consts_s *consts);
+void _gcry_crc32_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts);
+
+
+static inline void
+crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ u32 crc = *pcrc;
+ u32 data;
+
+ while (inlen >= 4)
+ {
+ data = ((const struct u32_unaligned_s *)inbuf)->a;
+ data ^= crc;
+
+ inlen -= 4;
+ inbuf += 4;
+
+ crc = _gcry_crc32r_armv8_ce_reduction_4 (data, 0, consts);
+ }
+
+ switch (inlen)
+ {
+ case 0:
+ break;
+ case 1:
+ data = inbuf[0];
+ data ^= crc;
+ data <<= 24;
+ crc >>= 8;
+ crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 2:
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data ^= crc;
+ data <<= 16;
+ crc >>= 16;
+ crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 3:
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data |= inbuf[2] << 16;
+ data ^= crc;
+ data <<= 8;
+ crc >>= 24;
+ crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
+ break;
+ }
+
+ *pcrc = crc;
+}
+
+static inline void
+crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ u32 crc = *pcrc;
+ u32 data;
+
+ while (inlen >= 4)
+ {
+ data = ((const struct u32_unaligned_s *)inbuf)->a;
+ data ^= crc;
+ data = _gcry_bswap32(data);
+
+ inlen -= 4;
+ inbuf += 4;
+
+ crc = _gcry_crc32_armv8_ce_reduction_4 (data, 0, consts);
+ }
+
+ switch (inlen)
+ {
+ case 0:
+ break;
+ case 1:
+ data = inbuf[0];
+ data ^= crc;
+ data = data & 0xffU;
+ crc = _gcry_bswap32(crc >> 8);
+ crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 2:
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data ^= crc;
+ data = _gcry_bswap32(data << 16);
+ crc = _gcry_bswap32(crc >> 16);
+ crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 3:
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data |= inbuf[2] << 16;
+ data ^= crc;
+ data = _gcry_bswap32(data << 8);
+ crc = crc & 0xff000000U;
+ crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
+ break;
+ }
+
+ *pcrc = crc;
+}
+
+void
+_gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+ const struct crc32_consts_s *consts = &crc32_consts;
+
+ if (!inlen)
+ return;
+
+ if (inlen >= 16)
+ _gcry_crc32r_armv8_ce_bulk (pcrc, inbuf, inlen, consts);
+ else
+ crc32r_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+void
+_gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+ const struct crc32_consts_s *consts = &crc24rfc2440_consts;
+
+ if (!inlen)
+ return;
+
+ /* Note: *pcrc in input endian. */
+
+ if (inlen >= 16)
+ _gcry_crc32_armv8_ce_bulk (pcrc, inbuf, inlen, consts);
+ else
+ crc32_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c b/comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c
new file mode 100644
index 0000000000..8c8b1915ab
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c
@@ -0,0 +1,939 @@
+/* crc-intel-pclmul.c - Intel PCLMUL accelerated CRC implementation
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+
+#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) && \
+ __GNUC__ >= 4 && \
+ ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+# pragma GCC target("no-sse")
+#endif
+#if __clang__
+# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+
+
+#define ALIGNED_16 __attribute__ ((aligned (16)))
+
+
+struct u16_unaligned_s
+{
+ u16 a;
+} __attribute__((packed, aligned (1), may_alias));
+
+
+/* Constants structure for generic reflected/non-reflected CRC32 CLMUL
+ * functions. */
+struct crc32_consts_s
+{
+ /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
+ u64 k[6];
+ /* my_p: { floor(x^64 / P(x)), P(x) } */
+ u64 my_p[2];
+};
+
+
+/* CLMUL constants for CRC32 and CRC32RFC1510. */
+static const struct crc32_consts_s crc32_consts ALIGNED_16 =
+{
+ { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
+ U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
+ U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
+ U64_C(0x163cd6124), 0 /* y = 2 */
+ },
+ { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
+ U64_C(0x1f7011641), U64_C(0x1db710641)
+ }
+};
+
+/* CLMUL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
+static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 =
+{
+ { /* k[6] = x^(32*y) mod P(x) << 32*/
+ U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
+ U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
+ U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */
+ },
+ { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
+ U64_C(0x1f845fe24), U64_C(0x1864cfb00)
+ }
+};
+
+/* Common constants for CRC32 algorithms. */
+static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_16 =
+ {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+static const byte crc32_shuf_shift[3 * 16] ALIGNED_16 =
+ {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+static const byte *crc32_bswap_shuf = &crc32_shuf_shift[16];
+static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_16 =
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+static const u64 crc32_merge9to15_shuf[15 - 9 + 1][2] ALIGNED_16 =
+ {
+ { U64_C(0x0706050403020100), U64_C(0xffffffffffffff0f) }, /* 9 */
+ { U64_C(0x0706050403020100), U64_C(0xffffffffffff0f0e) },
+ { U64_C(0x0706050403020100), U64_C(0xffffffffff0f0e0d) },
+ { U64_C(0x0706050403020100), U64_C(0xffffffff0f0e0d0c) },
+ { U64_C(0x0706050403020100), U64_C(0xffffff0f0e0d0c0b) },
+ { U64_C(0x0706050403020100), U64_C(0xffff0f0e0d0c0b0a) },
+ { U64_C(0x0706050403020100), U64_C(0xff0f0e0d0c0b0a09) }, /* 15 */
+ };
+static const u64 crc32_merge5to7_shuf[7 - 5 + 1][2] ALIGNED_16 =
+ {
+ { U64_C(0xffffff0703020100), U64_C(0xffffffffffffffff) }, /* 5 */
+ { U64_C(0xffff070603020100), U64_C(0xffffffffffffffff) },
+ { U64_C(0xff07060503020100), U64_C(0xffffffffffffffff) }, /* 7 */
+ };
+
+/* PCLMUL functions for reflected CRC32. */
+static ASM_FUNC_ATTR_INLINE void
+crc32_reflected_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ if (inlen >= 8 * 16)
+ {
+ asm volatile ("movd %[crc], %%xmm4\n\t"
+ "movdqu %[inbuf_0], %%xmm0\n\t"
+ "movdqu %[inbuf_1], %%xmm1\n\t"
+ "movdqu %[inbuf_2], %%xmm2\n\t"
+ "movdqu %[inbuf_3], %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+ :
+ : [inbuf_0] "m" (inbuf[0 * 16]),
+ [inbuf_1] "m" (inbuf[1 * 16]),
+ [inbuf_2] "m" (inbuf[2 * 16]),
+ [inbuf_3] "m" (inbuf[3 * 16]),
+ [crc] "m" (*pcrc)
+ );
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+
+ asm volatile ("movdqa %[k1k2], %%xmm4\n\t"
+ :
+ : [k1k2] "m" (consts->k[1 - 1])
+ );
+
+ /* Fold by 4. */
+ while (inlen >= 4 * 16)
+ {
+ asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t"
+ "movdqa %%xmm0, %%xmm6\n\t"
+ "pclmulqdq $0x00, %%xmm4, %%xmm0\n\t"
+ "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+
+ "movdqu %[inbuf_1], %%xmm5\n\t"
+ "movdqa %%xmm1, %%xmm6\n\t"
+ "pclmulqdq $0x00, %%xmm4, %%xmm1\n\t"
+ "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "pxor %%xmm6, %%xmm1\n\t"
+
+ "movdqu %[inbuf_2], %%xmm5\n\t"
+ "movdqa %%xmm2, %%xmm6\n\t"
+ "pclmulqdq $0x00, %%xmm4, %%xmm2\n\t"
+ "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "pxor %%xmm6, %%xmm2\n\t"
+
+ "movdqu %[inbuf_3], %%xmm5\n\t"
+ "movdqa %%xmm3, %%xmm6\n\t"
+ "pclmulqdq $0x00, %%xmm4, %%xmm3\n\t"
+ "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm6, %%xmm3\n\t"
+ :
+ : [inbuf_0] "m" (inbuf[0 * 16]),
+ [inbuf_1] "m" (inbuf[1 * 16]),
+ [inbuf_2] "m" (inbuf[2 * 16]),
+ [inbuf_3] "m" (inbuf[3 * 16])
+ );
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+ }
+
+ asm volatile ("movdqa %[k3k4], %%xmm6\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ :
+ : [k3k4] "m" (consts->k[3 - 1]),
+ [my_p] "m" (consts->my_p[0])
+ );
+
+ /* Fold 4 to 1. */
+
+ asm volatile ("movdqa %%xmm0, %%xmm4\n\t"
+ "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+
+ "movdqa %%xmm0, %%xmm4\n\t"
+ "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+
+ "movdqa %%xmm0, %%xmm4\n\t"
+ "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm3, %%xmm0\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+ :
+ :
+ );
+ }
+ else
+ {
+ asm volatile ("movd %[crc], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "movdqa %[k3k4], %%xmm6\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ :
+ : [inbuf] "m" (*inbuf),
+ [crc] "m" (*pcrc),
+ [k3k4] "m" (consts->k[3 - 1]),
+ [my_p] "m" (consts->my_p[0])
+ );
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+
+ /* Fold by 1. */
+ if (inlen >= 16)
+ {
+ while (inlen >= 16)
+ {
+ /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */
+ asm volatile ("movdqu %[inbuf], %%xmm2\n\t"
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*inbuf)
+ );
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+ }
+
+ /* Partial fold. */
+ if (inlen)
+ {
+ /* Load last input and add padding zeros. */
+ asm volatile ("movdqu %[shr_shuf], %%xmm3\n\t"
+ "movdqu %[shl_shuf], %%xmm4\n\t"
+ "movdqu %[mask], %%xmm2\n\t"
+
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pshufb %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf], %%xmm4\n\t"
+ "pshufb %%xmm3, %%xmm1\n\t"
+ "pand %%xmm4, %%xmm2\n\t"
+ "por %%xmm1, %%xmm2\n\t"
+
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*(inbuf - 16 + inlen)),
+ [mask] "m" (crc32_partial_fold_input_mask[inlen]),
+ [shl_shuf] "m" (crc32_refl_shuf_shift[inlen]),
+ [shr_shuf] "m" (crc32_refl_shuf_shift[inlen + 16])
+ );
+
+ inbuf += inlen;
+ inlen -= inlen;
+ }
+
+ /* Final fold. */
+ asm volatile (/* reduce 128-bits to 96-bits */
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t"
+ "psrldq $8, %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ /* reduce 96-bits to 64-bits */
+ "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */
+ "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */
+ "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */
+ "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */
+
+ /* barrett reduction */
+ "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */
+ "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */
+ "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+ "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ /* store CRC */
+ "pextrd $2, %%xmm0, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ : [k5] "m" (consts->k[5 - 1])
+ );
+}
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_reflected_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ if (inlen < 4)
+ {
+ u32 crc = *pcrc;
+ u32 data;
+
+ asm volatile ("movdqa %[my_p], %%xmm5\n\t"
+ :
+ : [my_p] "m" (consts->my_p[0])
+ );
+
+ if (inlen == 1)
+ {
+ data = inbuf[0];
+ data ^= crc;
+ data <<= 24;
+ crc >>= 8;
+ }
+ else if (inlen == 2)
+ {
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data ^= crc;
+ data <<= 16;
+ crc >>= 16;
+ }
+ else
+ {
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data |= inbuf[2] << 16;
+ data ^= crc;
+ data <<= 8;
+ crc >>= 24;
+ }
+
+ /* Barrett reduction */
+ asm volatile ("movd %[in], %%xmm0\n\t"
+ "movd %[crc], %%xmm1\n\t"
+
+ "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+ "psllq $32, %%xmm1\n\t"
+ "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */
+ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ "pextrd $1, %%xmm0, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ : [in] "rm" (data),
+ [crc] "rm" (crc)
+ );
+ }
+ else if (inlen == 4)
+ {
+ /* Barrett reduction */
+ asm volatile ("movd %[crc], %%xmm1\n\t"
+ "movd %[in], %%xmm0\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+ "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */
+ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+
+ "pextrd $1, %%xmm0, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ : [in] "m" (*inbuf),
+ [crc] "m" (*pcrc),
+ [my_p] "m" (consts->my_p[0])
+ );
+ }
+ else
+ {
+ asm volatile ("movdqu %[shuf], %%xmm4\n\t"
+ "movd %[crc], %%xmm1\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ "movdqa %[k3k4], %%xmm6\n\t"
+ :
+ : [shuf] "m" (crc32_refl_shuf_shift[inlen]),
+ [crc] "m" (*pcrc),
+ [my_p] "m" (consts->my_p[0]),
+ [k3k4] "m" (consts->k[3 - 1])
+ );
+
+ if (inlen >= 8)
+ {
+ asm volatile ("movq %[inbuf], %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*inbuf)
+ );
+ if (inlen > 8)
+ {
+ asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/
+ "movq %[inbuf_tail], %%xmm2\n\t"
+ "punpcklqdq %%xmm2, %%xmm0\n\t"
+ "pshufb %[merge_shuf], %%xmm0\n\t"
+ :
+ : [inbuf_tail] "m" (inbuf[inlen - 8]),
+ [merge_shuf] "m"
+ (*crc32_merge9to15_shuf[inlen - 9])
+ );
+ }
+ }
+ else
+ {
+ asm volatile ("movd %[inbuf], %%xmm0\n\t"
+ "pinsrd $1, %[inbuf_tail], %%xmm0\n\t"
+ "pshufb %[merge_shuf], %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*inbuf),
+ [inbuf_tail] "m" (inbuf[inlen - 4]),
+ [merge_shuf] "m"
+ (*crc32_merge5to7_shuf[inlen - 5])
+ );
+ }
+
+ /* Final fold. */
+ asm volatile ("pxor %%xmm1, %%xmm0\n\t"
+ "pshufb %%xmm4, %%xmm0\n\t"
+
+ /* reduce 128-bits to 96-bits */
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t"
+ "psrldq $8, %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm0\n\t" /* top 32-bit are zero */
+
+ /* reduce 96-bits to 64-bits */
+ "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */
+ "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */
+ "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */
+ "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */
+
+ /* barrett reduction */
+ "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */
+ "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */
+ "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+ "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ /* store CRC */
+ "pextrd $2, %%xmm0, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ : [k5] "m" (consts->k[5 - 1])
+ );
+ }
+}
+
+/* PCLMUL functions for non-reflected CRC32. */
+static ASM_FUNC_ATTR_INLINE void
+crc32_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ asm volatile ("movdqa %[bswap], %%xmm7\n\t"
+ :
+ : [bswap] "m" (*crc32_bswap_shuf)
+ );
+
+ if (inlen >= 8 * 16)
+ {
+ asm volatile ("movd %[crc], %%xmm4\n\t"
+ "movdqu %[inbuf_0], %%xmm0\n\t"
+ "movdqu %[inbuf_1], %%xmm1\n\t"
+ "movdqu %[inbuf_2], %%xmm2\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf_3], %%xmm3\n\t"
+ "pshufb %%xmm7, %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm1\n\t"
+ "pshufb %%xmm7, %%xmm2\n\t"
+ "pshufb %%xmm7, %%xmm3\n\t"
+ :
+ : [inbuf_0] "m" (inbuf[0 * 16]),
+ [inbuf_1] "m" (inbuf[1 * 16]),
+ [inbuf_2] "m" (inbuf[2 * 16]),
+ [inbuf_3] "m" (inbuf[3 * 16]),
+ [crc] "m" (*pcrc)
+ );
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+
+ asm volatile ("movdqa %[k1k2], %%xmm4\n\t"
+ :
+ : [k1k2] "m" (consts->k[1 - 1])
+ );
+
+ /* Fold by 4. */
+ while (inlen >= 4 * 16)
+ {
+ asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t"
+ "movdqa %%xmm0, %%xmm6\n\t"
+ "pshufb %%xmm7, %%xmm5\n\t"
+ "pclmulqdq $0x01, %%xmm4, %%xmm0\n\t"
+ "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+
+ "movdqu %[inbuf_1], %%xmm5\n\t"
+ "movdqa %%xmm1, %%xmm6\n\t"
+ "pshufb %%xmm7, %%xmm5\n\t"
+ "pclmulqdq $0x01, %%xmm4, %%xmm1\n\t"
+ "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "pxor %%xmm6, %%xmm1\n\t"
+
+ "movdqu %[inbuf_2], %%xmm5\n\t"
+ "movdqa %%xmm2, %%xmm6\n\t"
+ "pshufb %%xmm7, %%xmm5\n\t"
+ "pclmulqdq $0x01, %%xmm4, %%xmm2\n\t"
+ "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "pxor %%xmm6, %%xmm2\n\t"
+
+ "movdqu %[inbuf_3], %%xmm5\n\t"
+ "movdqa %%xmm3, %%xmm6\n\t"
+ "pshufb %%xmm7, %%xmm5\n\t"
+ "pclmulqdq $0x01, %%xmm4, %%xmm3\n\t"
+ "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm6, %%xmm3\n\t"
+ :
+ : [inbuf_0] "m" (inbuf[0 * 16]),
+ [inbuf_1] "m" (inbuf[1 * 16]),
+ [inbuf_2] "m" (inbuf[2 * 16]),
+ [inbuf_3] "m" (inbuf[3 * 16])
+ );
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+ }
+
+ asm volatile ("movdqa %[k3k4], %%xmm6\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ :
+ : [k3k4] "m" (consts->k[3 - 1]),
+ [my_p] "m" (consts->my_p[0])
+ );
+
+ /* Fold 4 to 1. */
+
+ asm volatile ("movdqa %%xmm0, %%xmm4\n\t"
+ "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+
+ "movdqa %%xmm0, %%xmm4\n\t"
+ "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+
+ "movdqa %%xmm0, %%xmm4\n\t"
+ "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm3, %%xmm0\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+ :
+ :
+ );
+ }
+ else
+ {
+ asm volatile ("movd %[crc], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "movdqa %[k3k4], %%xmm6\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ "pshufb %%xmm7, %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*inbuf),
+ [crc] "m" (*pcrc),
+ [k3k4] "m" (consts->k[3 - 1]),
+ [my_p] "m" (consts->my_p[0])
+ );
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+
+ /* Fold by 1. */
+ if (inlen >= 16)
+ {
+ while (inlen >= 16)
+ {
+ /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */
+ asm volatile ("movdqu %[inbuf], %%xmm2\n\t"
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm2\n\t"
+ "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*inbuf)
+ );
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+ }
+
+ /* Partial fold. */
+ if (inlen)
+ {
+ /* Load last input and add padding zeros. */
+ asm volatile ("movdqu %[shl_shuf], %%xmm4\n\t"
+ "movdqu %[shr_shuf], %%xmm3\n\t"
+ "movdqu %[mask], %%xmm2\n\t"
+
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pshufb %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf], %%xmm4\n\t"
+ "pshufb %%xmm3, %%xmm1\n\t"
+ "pand %%xmm4, %%xmm2\n\t"
+ "por %%xmm1, %%xmm2\n\t"
+
+ "pshufb %%xmm7, %%xmm2\n\t"
+
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+ "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*(inbuf - 16 + inlen)),
+ [mask] "m" (crc32_partial_fold_input_mask[inlen]),
+ [shl_shuf] "m" (crc32_refl_shuf_shift[32 - inlen]),
+ [shr_shuf] "m" (crc32_shuf_shift[inlen + 16])
+ );
+
+ inbuf += inlen;
+ inlen -= inlen;
+ }
+
+ /* Final fold. */
+ asm volatile (/* reduce 128-bits to 96-bits */
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t"
+ "pslldq $8, %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */
+
+ /* reduce 96-bits to 64-bits */
+ "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */
+ "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */
+ "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */
+ "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */
+
+ /* barrett reduction */
+ "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */
+ "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */
+ "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */
+ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ /* store CRC in input endian */
+ "movd %%xmm0, %%eax\n\t"
+ "bswapl %%eax\n\t"
+ "movl %%eax, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ : [k5] "m" (consts->k[5 - 1])
+ : "eax" );
+}
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ if (inlen < 4)
+ {
+ u32 crc = *pcrc;
+ u32 data;
+
+ asm volatile ("movdqa %[my_p], %%xmm5\n\t"
+ :
+ : [my_p] "m" (consts->my_p[0])
+ );
+
+ if (inlen == 1)
+ {
+ data = inbuf[0];
+ data ^= crc;
+ data = _gcry_bswap32(data << 24);
+ crc = _gcry_bswap32(crc >> 8);
+ }
+ else if (inlen == 2)
+ {
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data ^= crc;
+ data = _gcry_bswap32(data << 16);
+ crc = _gcry_bswap32(crc >> 16);
+ }
+ else
+ {
+ data = ((const struct u16_unaligned_s *)inbuf)->a;
+ data |= inbuf[2] << 16;
+ data ^= crc;
+ data = _gcry_bswap32(data << 8);
+ crc = _gcry_bswap32(crc >> 24);
+ }
+
+ /* Barrett reduction */
+ asm volatile ("movd %[in], %%xmm0\n\t"
+ "psllq $32, %%xmm0\n\t" /* [00][00][xx][00] */
+ "movd %[crc], %%xmm1\n\t"
+
+ "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */
+ "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ /* store CRC in input endian */
+ "movd %%xmm0, %%eax\n\t"
+ "bswapl %%eax\n\t"
+ "movl %%eax, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ : [in] "r" (data),
+ [crc] "r" (crc)
+ : "eax" );
+ }
+ else if (inlen == 4)
+ {
+ /* Barrett reduction */
+ asm volatile ("movd %[crc], %%xmm0\n\t"
+ "movd %[in], %%xmm1\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ :
+ : [in] "m" (*inbuf),
+ [crc] "m" (*pcrc),
+ [my_p] "m" (consts->my_p[0])
+ : "cc" );
+
+ asm volatile ("pxor %%xmm1, %%xmm0\n\t"
+ "pshufb %[bswap], %%xmm0\n\t" /* [xx][00][00][00] */
+
+ "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */
+ "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+ :
+ : [bswap] "m" (*crc32_bswap_shuf)
+ : "cc" );
+
+ asm volatile (/* store CRC in input endian */
+ "movd %%xmm0, %%eax\n\t"
+ "bswapl %%eax\n\t"
+ "movl %%eax, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ :
+ : "eax", "cc" );
+ }
+ else
+ {
+ asm volatile ("movdqu %[shuf], %%xmm7\n\t"
+ "movd %[crc], %%xmm1\n\t"
+ "movdqa %[my_p], %%xmm5\n\t"
+ "movdqa %[k3k4], %%xmm6\n\t"
+ :
+ : [shuf] "m" (crc32_shuf_shift[32 - inlen]),
+ [crc] "m" (*pcrc),
+ [my_p] "m" (consts->my_p[0]),
+ [k3k4] "m" (consts->k[3 - 1])
+ );
+
+ if (inlen >= 8)
+ {
+ asm volatile ("movq %[inbuf], %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*inbuf)
+ );
+ if (inlen > 8)
+ {
+ asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/
+ "movq %[inbuf_tail], %%xmm2\n\t"
+ "punpcklqdq %%xmm2, %%xmm0\n\t"
+ "pshufb %[merge_shuf], %%xmm0\n\t"
+ :
+ : [inbuf_tail] "m" (inbuf[inlen - 8]),
+ [merge_shuf] "m"
+ (*crc32_merge9to15_shuf[inlen - 9])
+ );
+ }
+ }
+ else
+ {
+ asm volatile ("movd %[inbuf], %%xmm0\n\t"
+ "pinsrd $1, %[inbuf_tail], %%xmm0\n\t"
+ "pshufb %[merge_shuf], %%xmm0\n\t"
+ :
+ : [inbuf] "m" (*inbuf),
+ [inbuf_tail] "m" (inbuf[inlen - 4]),
+ [merge_shuf] "m"
+ (*crc32_merge5to7_shuf[inlen - 5])
+ );
+ }
+
+ /* Final fold. */
+ asm volatile ("pxor %%xmm1, %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm0\n\t"
+
+ /* reduce 128-bits to 96-bits */
+ "movdqa %%xmm0, %%xmm1\n\t"
+ "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t"
+ "pslldq $8, %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */
+
+ /* reduce 96-bits to 64-bits */
+ "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */
+ "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */
+ "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */
+ "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */
+
+ /* barrett reduction */
+ "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */
+ "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */
+ "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */
+ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+
+ /* store CRC in input endian */
+ "movd %%xmm0, %%eax\n\t"
+ "bswapl %%eax\n\t"
+ "movl %%eax, %[out]\n\t"
+ : [out] "=m" (*pcrc)
+ : [k5] "m" (consts->k[5 - 1])
+ : "eax" );
+ }
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+ const struct crc32_consts_s *consts = &crc32_consts;
+#if defined(__x86_64__) && defined(__WIN64__)
+ char win64tmp[2 * 16];
+
+ /* XMM6-XMM7 need to be restored after use. */
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+ "movdqu %%xmm7, 1*16(%0)\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory");
+#endif
+
+ if (!inlen)
+ return;
+
+ if (inlen >= 16)
+ crc32_reflected_bulk(pcrc, inbuf, inlen, consts);
+ else
+ crc32_reflected_less_than_16(pcrc, inbuf, inlen, consts);
+
+#if defined(__x86_64__) && defined(__WIN64__)
+ /* Restore used registers. */
+ asm volatile("movdqu 0*16(%0), %%xmm6\n\t"
+ "movdqu 1*16(%0), %%xmm7\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory");
+#endif
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+ const struct crc32_consts_s *consts = &crc24rfc2440_consts;
+#if defined(__x86_64__) && defined(__WIN64__)
+ char win64tmp[2 * 16];
+
+ /* XMM6-XMM7 need to be restored after use. */
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+ "movdqu %%xmm7, 1*16(%0)\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory");
+#endif
+
+ if (!inlen)
+ return;
+
+ /* Note: *pcrc in input endian. */
+
+ if (inlen >= 16)
+ crc32_bulk(pcrc, inbuf, inlen, consts);
+ else
+ crc32_less_than_16(pcrc, inbuf, inlen, consts);
+
+#if defined(__x86_64__) && defined(__WIN64__)
+ /* Restore used registers. */
+ asm volatile("movdqu 0*16(%0), %%xmm6\n\t"
+ "movdqu 1*16(%0), %%xmm7\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory");
+#endif
+}
+
+#if __clang__
+# pragma clang attribute pop
+#endif
+
+#endif /* USE_INTEL_PCLMUL */
diff --git a/comm/third_party/libgcrypt/cipher/crc-ppc.c b/comm/third_party/libgcrypt/cipher/crc-ppc.c
new file mode 100644
index 0000000000..b9a40130ce
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/crc-ppc.c
@@ -0,0 +1,656 @@
+/* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+ defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+ __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+#define ALIGNED_64 __attribute__ ((aligned (64)))
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned int vector4x_u32;
+typedef vector unsigned long long vector2x_u64;
+
+
+/* Constants structure for generic reflected/non-reflected CRC32 PMULL
+ * functions. */
+struct crc32_consts_s
+{
+ /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
+ unsigned long long k[6];
+ /* my_p: { floor(x^64 / P(x)), P(x) } */
+ unsigned long long my_p[2];
+};
+
+/* PMULL constants for CRC32 and CRC32RFC1510. */
+static const struct crc32_consts_s crc32_consts ALIGNED_64 =
+{
+ { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
+ U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
+ U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
+ U64_C(0x163cd6124), 0 /* y = 2 */
+ },
+ { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
+ U64_C(0x1f7011641), U64_C(0x1db710641)
+ }
+};
+
+/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
+static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_64 =
+{
+ { /* k[6] = x^(32*y) mod P(x) << 32*/
+ U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
+ U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
+ U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */
+ },
+ { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
+ U64_C(0x1f845fe24), U64_C(0x1864cfb00)
+ }
+};
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vpmsumd(vector2x_u64 a, vector2x_u64 b)
+{
+ __asm__("vpmsumd %0, %1, %2"
+ : "=v" (a)
+ : "v" (a), "v" (b));
+ return a;
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_swap_u64(vector2x_u64 a)
+{
+ __asm__("xxswapd %x0, %x1"
+ : "=wa" (a)
+ : "wa" (a));
+ return a;
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_sld_u32(vector4x_u32 a, vector4x_u32 b, unsigned int idx)
+{
+ return vec_sld (a, b, (4 * idx) & 15);
+}
+
+
+static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_64 =
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+static const byte crc32_shuf_shift[3 * 16] ALIGNED_64 =
+ {
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+ 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ };
+static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_64 =
+ {
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ };
+static const vector16x_u8 bswap_const ALIGNED_64 =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+
+
+#define CRC_VEC_SWAP(v) ({ vector2x_u64 __vecu64 = (v); \
+ vec_perm(__vecu64, __vecu64, bswap_const); })
+
+#ifdef WORDS_BIGENDIAN
+# define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
+# define CRC_VEC_U64_LOAD(offs, ptr) \
+ asm_swap_u64(asm_vec_u64_load(offs, ptr))
+# define CRC_VEC_U64_LOAD_LE(offs, ptr) \
+ CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr))
+# define CRC_VEC_U64_LOAD_BE(offs, ptr) \
+ asm_vec_u64_load(offs, ptr)
+# define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
+# define CRC_VEC_SWAP_TO_BE(v) (v)
+# define VEC_U64_LO 1
+# define VEC_U64_HI 0
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load(unsigned long offset, const void *ptr)
+{
+ vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
+ : "=wa" (vecu64)
+ : "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
+ : "=wa" (vecu64)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+ return vecu64;
+}
+#else
+# define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
+# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr)
+# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr)
+# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
+# define CRC_VEC_SWAP_TO_LE(v) (v)
+# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
+# define VEC_U64_LO 0
+# define VEC_U64_HI 1
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load_le(unsigned long offset, const void *ptr)
+{
+ vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
+ : "=wa" (vecu64)
+ : "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
+ : "=wa" (vecu64)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+ return asm_swap_u64(vecu64);
+}
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load_be(unsigned int offset, const void *ptr)
+{
+ static const vector16x_u8 vec_load_le_const =
+ { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
+ vector2x_u64 vecu64;
+
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ ("lxvd2x %%vs32,0,%1\n\t"
+ "vperm %0,%%v0,%%v0,%2\n\t"
+ : "=v" (vecu64)
+ : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const)
+ : "memory", "v0");
+#endif
+ else
+ __asm__ ("lxvd2x %%vs32,%1,%2\n\t"
+ "vperm %0,%%v0,%%v0,%3\n\t"
+ : "=v" (vecu64)
+ : "r" (offset), "r" ((uintptr_t)(ptr)),
+ "v" (vec_load_le_const)
+ : "memory", "r0", "v0");
+
+ return vecu64;
+}
+#endif
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32r_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ vector4x_u32 zero = { 0, 0, 0, 0 };
+ vector2x_u64 low_64bit_mask = CRC_VEC_U64_DEF((u64)-1, 0);
+ vector2x_u64 low_32bit_mask = CRC_VEC_U64_DEF((u32)-1, 0);
+ vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
+ vector2x_u64 k1k2 = CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]);
+ vector2x_u64 k3k4 = CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]);
+ vector2x_u64 k4lo = CRC_VEC_U64_DEF(k3k4[VEC_U64_HI], 0);
+ vector2x_u64 k5lo = CRC_VEC_U64_LOAD(0, &consts->k[5 - 1]);
+ vector2x_u64 crc = CRC_VEC_U64_DEF(*pcrc, 0);
+ vector2x_u64 crc0, crc1, crc2, crc3;
+ vector2x_u64 v0;
+
+ if (inlen >= 8 * 16)
+ {
+ crc0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
+ crc0 ^= crc;
+ crc1 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
+ crc2 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
+ crc3 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+
+ /* Fold by 4. */
+ while (inlen >= 4 * 16)
+ {
+ v0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
+ crc0 = asm_vpmsumd(crc0, k1k2) ^ v0;
+
+ v0 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
+ crc1 = asm_vpmsumd(crc1, k1k2) ^ v0;
+
+ v0 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
+ crc2 = asm_vpmsumd(crc2, k1k2) ^ v0;
+
+ v0 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
+ crc3 = asm_vpmsumd(crc3, k1k2) ^ v0;
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+ }
+
+ /* Fold 4 to 1. */
+ crc1 ^= asm_vpmsumd(crc0, k3k4);
+ crc2 ^= asm_vpmsumd(crc1, k3k4);
+ crc3 ^= asm_vpmsumd(crc2, k3k4);
+ crc = crc3;
+ }
+ else
+ {
+ v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
+ crc ^= v0;
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+
+ /* Fold by 1. */
+ while (inlen >= 16)
+ {
+ v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
+ crc = asm_vpmsumd(k3k4, crc);
+ crc ^= v0;
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+
+ /* Partial fold. */
+ if (inlen)
+ {
+ /* Load last input and add padding zeros. */
+ vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);
+ vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(inlen, crc32_refl_shuf_shift);
+ vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_refl_shuf_shift);
+
+ v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
+ v0 &= mask;
+
+ crc = CRC_VEC_SWAP_TO_LE(crc);
+ v0 |= (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+ (vector16x_u8)shr_shuf);
+ crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+ (vector16x_u8)shl_shuf);
+ crc = asm_vpmsumd(k3k4, crc);
+ crc ^= v0;
+
+ inbuf += inlen;
+ inlen -= inlen;
+ }
+
+ /* Final fold. */
+
+ /* reduce 128-bits to 96-bits */
+ v0 = asm_swap_u64(crc);
+ v0 &= low_64bit_mask;
+ crc = asm_vpmsumd(k4lo, crc);
+ crc ^= v0;
+
+ /* reduce 96-bits to 64-bits */
+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+ (vector4x_u32)crc, 3); /* [x0][x3][x2][x1] */
+ v0 &= low_64bit_mask; /* [00][00][x2][x1] */
+ crc = crc & low_32bit_mask; /* [00][00][00][x0] */
+ crc = v0 ^ asm_vpmsumd(k5lo, crc); /* [00][00][xx][xx] */
+
+ /* barrett reduction */
+ v0 = crc << 32; /* [00][00][x0][00] */
+ v0 = asm_vpmsumd(my_p, v0);
+ v0 = asm_swap_u64(v0);
+ v0 = asm_vpmsumd(my_p, v0);
+ crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+ zero, 1); /* [00][x1][x0][00] */
+ crc ^= v0;
+
+ *pcrc = (u32)crc[VEC_U64_HI];
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+crc32r_ppc8_ce_reduction_4 (u32 data, u32 crc,
+ const struct crc32_consts_s *consts)
+{
+ vector4x_u32 zero = { 0, 0, 0, 0 };
+ vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
+ vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data, 0);
+ v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */
+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
+ zero, 3); /* [x0][00][00][00] */
+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
+ (vector4x_u32)v0, 3); /* [00][x0][00][00] */
+ v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */
+ return (v0[VEC_U64_LO] >> 32) ^ crc;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ u32 crc = *pcrc;
+ u32 data;
+
+ while (inlen >= 4)
+ {
+ data = buf_get_le32(inbuf);
+ data ^= crc;
+
+ inlen -= 4;
+ inbuf += 4;
+
+ crc = crc32r_ppc8_ce_reduction_4 (data, 0, consts);
+ }
+
+ switch (inlen)
+ {
+ case 0:
+ break;
+ case 1:
+ data = inbuf[0];
+ data ^= crc;
+ data <<= 24;
+ crc >>= 8;
+ crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 2:
+ data = inbuf[0] << 0;
+ data |= inbuf[1] << 8;
+ data ^= crc;
+ data <<= 16;
+ crc >>= 16;
+ crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 3:
+ data = inbuf[0] << 0;
+ data |= inbuf[1] << 8;
+ data |= inbuf[2] << 16;
+ data ^= crc;
+ data <<= 8;
+ crc >>= 24;
+ crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
+ break;
+ }
+
+ *pcrc = crc;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ vector4x_u32 zero = { 0, 0, 0, 0 };
+ vector2x_u64 low_96bit_mask = CRC_VEC_U64_DEF(~0, ~((u64)(u32)-1 << 32));
+ vector2x_u64 p_my = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->my_p[0]));
+ vector2x_u64 p_my_lo, p_my_hi;
+ vector2x_u64 k2k1 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]));
+ vector2x_u64 k4k3 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]));
+ vector2x_u64 k4hi = CRC_VEC_U64_DEF(0, consts->k[4 - 1]);
+ vector2x_u64 k5hi = CRC_VEC_U64_DEF(0, consts->k[5 - 1]);
+ vector2x_u64 crc = CRC_VEC_U64_DEF(0, _gcry_bswap64(*pcrc));
+ vector2x_u64 crc0, crc1, crc2, crc3;
+ vector2x_u64 v0;
+
+ if (inlen >= 8 * 16)
+ {
+ crc0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
+ crc0 ^= crc;
+ crc1 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
+ crc2 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
+ crc3 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+
+ /* Fold by 4. */
+ while (inlen >= 4 * 16)
+ {
+ v0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
+ crc0 = asm_vpmsumd(crc0, k2k1) ^ v0;
+
+ v0 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
+ crc1 = asm_vpmsumd(crc1, k2k1) ^ v0;
+
+ v0 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
+ crc2 = asm_vpmsumd(crc2, k2k1) ^ v0;
+
+ v0 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
+ crc3 = asm_vpmsumd(crc3, k2k1) ^ v0;
+
+ inbuf += 4 * 16;
+ inlen -= 4 * 16;
+ }
+
+ /* Fold 4 to 1. */
+ crc1 ^= asm_vpmsumd(crc0, k4k3);
+ crc2 ^= asm_vpmsumd(crc1, k4k3);
+ crc3 ^= asm_vpmsumd(crc2, k4k3);
+ crc = crc3;
+ }
+ else
+ {
+ v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
+ crc ^= v0;
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+
+ /* Fold by 1. */
+ while (inlen >= 16)
+ {
+ v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
+ crc = asm_vpmsumd(k4k3, crc);
+ crc ^= v0;
+
+ inbuf += 16;
+ inlen -= 16;
+ }
+
+ /* Partial fold. */
+ if (inlen)
+ {
+ /* Load last input and add padding zeros. */
+ vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);
+ vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(32 - inlen, crc32_refl_shuf_shift);
+ vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_shuf_shift);
+
+ v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
+ v0 &= mask;
+
+ crc = CRC_VEC_SWAP_TO_LE(crc);
+ crc2 = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+ (vector16x_u8)shr_shuf);
+ v0 |= crc2;
+ v0 = CRC_VEC_SWAP(v0);
+ crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+ (vector16x_u8)shl_shuf);
+ crc = asm_vpmsumd(k4k3, crc);
+ crc ^= v0;
+
+ inbuf += inlen;
+ inlen -= inlen;
+ }
+
+ /* Final fold. */
+
+ /* reduce 128-bits to 96-bits */
+ v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+ (vector4x_u32)zero, 2);
+ crc = asm_vpmsumd(k4hi, crc);
+ crc ^= v0; /* bottom 32-bit are zero */
+
+ /* reduce 96-bits to 64-bits */
+ v0 = crc & low_96bit_mask; /* [00][x2][x1][00] */
+ crc >>= 32; /* [00][x3][00][x0] */
+ crc = asm_vpmsumd(k5hi, crc); /* [00][xx][xx][00] */
+ crc ^= v0; /* top and bottom 32-bit are zero */
+
+ /* barrett reduction */
+ p_my_hi = p_my;
+ p_my_lo = p_my;
+ p_my_hi[VEC_U64_LO] = 0;
+ p_my_lo[VEC_U64_HI] = 0;
+ v0 = crc >> 32; /* [00][00][00][x1] */
+ crc = asm_vpmsumd(p_my_hi, crc); /* [00][xx][xx][xx] */
+ crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+ (vector4x_u32)crc, 3); /* [x0][00][x2][x1] */
+ crc = asm_vpmsumd(p_my_lo, crc); /* [00][xx][xx][xx] */
+ crc ^= v0;
+
+ *pcrc = _gcry_bswap32(crc[VEC_U64_LO]);
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+crc32_ppc8_ce_reduction_4 (u32 data, u32 crc,
+ const struct crc32_consts_s *consts)
+{
+ vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
+ vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data << 32, 0);
+ v0 = asm_vpmsumd(v0, my_p); /* [00][x1][x0][00] */
+ v0[VEC_U64_LO] = 0; /* [00][x1][00][00] */
+ v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */
+ return _gcry_bswap32(v0[VEC_U64_LO]) ^ crc;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+ const struct crc32_consts_s *consts)
+{
+ u32 crc = *pcrc;
+ u32 data;
+
+ while (inlen >= 4)
+ {
+ data = buf_get_le32(inbuf);
+ data ^= crc;
+ data = _gcry_bswap32(data);
+
+ inlen -= 4;
+ inbuf += 4;
+
+ crc = crc32_ppc8_ce_reduction_4 (data, 0, consts);
+ }
+
+ switch (inlen)
+ {
+ case 0:
+ break;
+ case 1:
+ data = inbuf[0];
+ data ^= crc;
+ data = data & 0xffU;
+ crc = crc >> 8;
+ crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 2:
+ data = inbuf[0] << 0;
+ data |= inbuf[1] << 8;
+ data ^= crc;
+ data = _gcry_bswap32(data << 16);
+ crc = crc >> 16;
+ crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
+ break;
+ case 3:
+ data = inbuf[0] << 0;
+ data |= inbuf[1] << 8;
+ data |= inbuf[2] << 16;
+ data ^= crc;
+ data = _gcry_bswap32(data << 8);
+ crc = crc >> 24;
+ crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
+ break;
+ }
+
+ *pcrc = crc;
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+ const struct crc32_consts_s *consts = &crc32_consts;
+
+ if (!inlen)
+ return;
+
+ if (inlen >= 16)
+ crc32r_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
+ else
+ crc32r_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+ const struct crc32_consts_s *consts = &crc24rfc2440_consts;
+
+ if (!inlen)
+ return;
+
+ /* Note: *pcrc in input endian. */
+
+ if (inlen >= 16)
+ crc32_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
+ else
+ crc32_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/crc.c b/comm/third_party/libgcrypt/cipher/crc.c
new file mode 100644
index 0000000000..6d70f644f7
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/crc.c
@@ -0,0 +1,955 @@
+/* crc.c - Cyclic redundancy checks.
+ * Copyright (C) 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+
+/* USE_INTEL_PCLMUL indicates whether to compile CRC with Intel PCLMUL/SSE4.1
+ * code. */
+#undef USE_INTEL_PCLMUL
+#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT)
+# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+# if __GNUC__ >= 4
+# define USE_INTEL_PCLMUL 1
+# endif
+# endif
+#endif /* USE_INTEL_PCLMUL */
+
+/* USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */
+#undef USE_ARM_PMULL
+#if defined(ENABLE_ARM_CRYPTO_SUPPORT)
+# if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+# define USE_ARM_PMULL 1
+# endif
+#endif /* USE_ARM_PMULL */
+
+/* USE_PPC_VPMSUM indicates whether to enable PowerPC vector
+ * accelerated code. */
+#undef USE_PPC_VPMSUM
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+# if __GNUC__ >= 4
+# define USE_PPC_VPMSUM 1
+# endif
+# endif
+#endif /* USE_PPC_VPMSUM */
+
+
+typedef struct
+{
+ u32 CRC;
+#ifdef USE_INTEL_PCLMUL
+ unsigned int use_pclmul:1; /* Intel PCLMUL shall be used. */
+#endif
+#ifdef USE_ARM_PMULL
+ unsigned int use_pmull:1; /* ARMv8 PMULL shall be used. */
+#endif
+#ifdef USE_PPC_VPMSUM
+ unsigned int use_vpmsum:1; /* POWER vpmsum shall be used. */
+#endif
+ byte buf[4];
+}
+CRC_CONTEXT;
+
+
+#ifdef USE_INTEL_PCLMUL
+/*-- crc-intel-pclmul.c --*/
+void _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf,
+ size_t inlen);
+#endif
+
+#ifdef USE_ARM_PMULL
+/*-- crc-armv8-ce.c --*/
+void _gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf,
+ size_t inlen);
+#endif
+
+#ifdef USE_PPC_VPMSUM
+/*-- crc-ppc.c --*/
+void _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf,
+ size_t inlen);
+#endif
+
+
+/*
+ * Code generated by universal_crc by Danjel McGougan
+ *
+ * CRC parameters used:
+ * bits: 32
+ * poly: 0x04c11db7
+ * init: 0xffffffff
+ * xor: 0xffffffff
+ * reverse: true
+ * non-direct: false
+ *
+ * CRC of the string "123456789" is 0xcbf43926
+ */
+
+static const u32 crc32_table[1024] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
+ 0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3,
+ 0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7,
+ 0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb,
+ 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf,
+ 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
+ 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496,
+ 0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a,
+ 0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e,
+ 0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761,
+ 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
+ 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69,
+ 0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d,
+ 0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530,
+ 0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034,
+ 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
+ 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c,
+ 0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6,
+ 0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2,
+ 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce,
+ 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
+ 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97,
+ 0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93,
+ 0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f,
+ 0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b,
+ 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
+ 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60,
+ 0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c,
+ 0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768,
+ 0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35,
+ 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
+ 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d,
+ 0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539,
+ 0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88,
+ 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c,
+ 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
+ 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484,
+ 0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9,
+ 0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd,
+ 0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1,
+ 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
+ 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a,
+ 0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e,
+ 0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522,
+ 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026,
+ 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
+ 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f,
+ 0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773,
+ 0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277,
+ 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d,
+ 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
+ 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85,
+ 0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81,
+ 0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc,
+ 0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8,
+ 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
+ 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0,
+ 0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f,
+ 0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b,
+ 0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27,
+ 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
+ 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e,
+ 0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a,
+ 0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876,
+ 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72,
+ 0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59,
+ 0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685,
+ 0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1,
+ 0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d,
+ 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
+ 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5,
+ 0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91,
+ 0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d,
+ 0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9,
+ 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
+ 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901,
+ 0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd,
+ 0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9,
+ 0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315,
+ 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
+ 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad,
+ 0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399,
+ 0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45,
+ 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221,
+ 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
+ 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9,
+ 0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835,
+ 0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151,
+ 0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d,
+ 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
+ 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5,
+ 0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1,
+ 0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d,
+ 0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609,
+ 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
+ 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1,
+ 0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d,
+ 0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9,
+ 0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05,
+ 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
+ 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd,
+ 0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9,
+ 0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75,
+ 0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711,
+ 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
+ 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339,
+ 0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5,
+ 0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281,
+ 0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d,
+ 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
+ 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895,
+ 0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1,
+ 0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d,
+ 0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819,
+ 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
+ 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1,
+ 0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d,
+ 0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69,
+ 0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5,
+ 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
+ 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d,
+ 0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9,
+ 0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625,
+ 0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41,
+ 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
+ 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89,
+ 0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555,
+ 0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31,
+ 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed,
+ 0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee,
+ 0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9,
+ 0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701,
+ 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056,
+ 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
+ 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26,
+ 0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e,
+ 0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9,
+ 0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0,
+ 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
+ 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f,
+ 0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68,
+ 0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f,
+ 0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018,
+ 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
+ 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7,
+ 0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3,
+ 0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084,
+ 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c,
+ 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
+ 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c,
+ 0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b,
+ 0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3,
+ 0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4,
+ 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
+ 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba,
+ 0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002,
+ 0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755,
+ 0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72,
+ 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
+ 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d,
+ 0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca,
+ 0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5,
+ 0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82,
+ 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
+ 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d,
+ 0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a,
+ 0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d,
+ 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5,
+ 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
+ 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb,
+ 0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc,
+ 0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04,
+ 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953,
+ 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
+ 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623,
+ 0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b,
+ 0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc,
+ 0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8,
+ 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
+ 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907,
+ 0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50,
+ 0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677,
+ 0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120,
+ 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
+ 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf,
+ 0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6,
+ 0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981,
+ 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639,
+ 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
+ 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949,
+ 0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e,
+ 0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6,
+ 0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1
+};
+
+/* CRC32 */
+
+static inline u32
+crc32_next (u32 crc, byte data)
+{
+ return (crc >> 8) ^ crc32_table[(crc & 0xff) ^ data];
+}
+
+/*
+ * Process 4 bytes in one go
+ */
+static inline u32
+crc32_next4 (u32 crc, u32 data)
+{
+ crc ^= data;
+ crc = crc32_table[(crc & 0xff) + 0x300] ^
+ crc32_table[((crc >> 8) & 0xff) + 0x200] ^
+ crc32_table[((crc >> 16) & 0xff) + 0x100] ^
+ crc32_table[(crc >> 24) & 0xff];
+ return crc;
+}
+
+static void
+crc32_init (void *context, unsigned int flags)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ u32 hwf = _gcry_get_hw_features ();
+
+#ifdef USE_INTEL_PCLMUL
+ ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
+#ifdef USE_ARM_PMULL
+ ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
+#endif
+#ifdef USE_PPC_VPMSUM
+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
+
+ (void)flags;
+ (void)hwf;
+
+ ctx->CRC = 0 ^ 0xffffffffL;
+}
+
+static void
+crc32_write (void *context, const void *inbuf_arg, size_t inlen)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ const byte *inbuf = inbuf_arg;
+ u32 crc;
+
+#ifdef USE_INTEL_PCLMUL
+ if (ctx->use_pclmul)
+ {
+ _gcry_crc32_intel_pclmul(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+#ifdef USE_ARM_PMULL
+ if (ctx->use_pmull)
+ {
+ _gcry_crc32_armv8_ce_pmull(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+#ifdef USE_PPC_VPMSUM
+ if (ctx->use_vpmsum)
+ {
+ _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+
+ if (!inbuf || !inlen)
+ return;
+
+ crc = ctx->CRC;
+
+ while (inlen >= 16)
+ {
+ inlen -= 16;
+ crc = crc32_next4(crc, buf_get_le32(&inbuf[0]));
+ crc = crc32_next4(crc, buf_get_le32(&inbuf[4]));
+ crc = crc32_next4(crc, buf_get_le32(&inbuf[8]));
+ crc = crc32_next4(crc, buf_get_le32(&inbuf[12]));
+ inbuf += 16;
+ }
+
+ while (inlen >= 4)
+ {
+ inlen -= 4;
+ crc = crc32_next4(crc, buf_get_le32(inbuf));
+ inbuf += 4;
+ }
+
+ while (inlen--)
+ {
+ crc = crc32_next(crc, *inbuf++);
+ }
+
+ ctx->CRC = crc;
+}
+
+static byte *
+crc32_read (void *context)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ return ctx->buf;
+}
+
+static void
+crc32_final (void *context)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ ctx->CRC ^= 0xffffffffL;
+ buf_put_be32 (ctx->buf, ctx->CRC);
+}
+
+/* CRC32 a'la RFC 1510 */
+/* CRC of the string "123456789" is 0x2dfd2d88 */
+
+static void
+crc32rfc1510_init (void *context, unsigned int flags)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ u32 hwf = _gcry_get_hw_features ();
+
+#ifdef USE_INTEL_PCLMUL
+ ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
+#ifdef USE_ARM_PMULL
+ ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
+#endif
+#ifdef USE_PPC_VPMSUM
+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
+
+ (void)flags;
+ (void)hwf;
+
+ ctx->CRC = 0;
+}
+
+static void
+crc32rfc1510_final (void *context)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ buf_put_be32(ctx->buf, ctx->CRC);
+}
+
+/* CRC24 a'la RFC 2440 */
+/*
+ * Code generated by universal_crc by Danjel McGougan
+ *
+ * CRC parameters used:
+ * bits: 24
+ * poly: 0x864cfb
+ * init: 0xb704ce
+ * xor: 0x000000
+ * reverse: false
+ * non-direct: false
+ *
+ * CRC of the string "123456789" is 0x21cf02
+ */
+
+static const u32 crc24_table[1024] =
+{
+ 0x00000000, 0x00fb4c86, 0x000dd58a, 0x00f6990c,
+ 0x00e1e693, 0x001aaa15, 0x00ec3319, 0x00177f9f,
+ 0x003981a1, 0x00c2cd27, 0x0034542b, 0x00cf18ad,
+ 0x00d86732, 0x00232bb4, 0x00d5b2b8, 0x002efe3e,
+ 0x00894ec5, 0x00720243, 0x00849b4f, 0x007fd7c9,
+ 0x0068a856, 0x0093e4d0, 0x00657ddc, 0x009e315a,
+ 0x00b0cf64, 0x004b83e2, 0x00bd1aee, 0x00465668,
+ 0x005129f7, 0x00aa6571, 0x005cfc7d, 0x00a7b0fb,
+ 0x00e9d10c, 0x00129d8a, 0x00e40486, 0x001f4800,
+ 0x0008379f, 0x00f37b19, 0x0005e215, 0x00feae93,
+ 0x00d050ad, 0x002b1c2b, 0x00dd8527, 0x0026c9a1,
+ 0x0031b63e, 0x00cafab8, 0x003c63b4, 0x00c72f32,
+ 0x00609fc9, 0x009bd34f, 0x006d4a43, 0x009606c5,
+ 0x0081795a, 0x007a35dc, 0x008cacd0, 0x0077e056,
+ 0x00591e68, 0x00a252ee, 0x0054cbe2, 0x00af8764,
+ 0x00b8f8fb, 0x0043b47d, 0x00b52d71, 0x004e61f7,
+ 0x00d2a319, 0x0029ef9f, 0x00df7693, 0x00243a15,
+ 0x0033458a, 0x00c8090c, 0x003e9000, 0x00c5dc86,
+ 0x00eb22b8, 0x00106e3e, 0x00e6f732, 0x001dbbb4,
+ 0x000ac42b, 0x00f188ad, 0x000711a1, 0x00fc5d27,
+ 0x005beddc, 0x00a0a15a, 0x00563856, 0x00ad74d0,
+ 0x00ba0b4f, 0x004147c9, 0x00b7dec5, 0x004c9243,
+ 0x00626c7d, 0x009920fb, 0x006fb9f7, 0x0094f571,
+ 0x00838aee, 0x0078c668, 0x008e5f64, 0x007513e2,
+ 0x003b7215, 0x00c03e93, 0x0036a79f, 0x00cdeb19,
+ 0x00da9486, 0x0021d800, 0x00d7410c, 0x002c0d8a,
+ 0x0002f3b4, 0x00f9bf32, 0x000f263e, 0x00f46ab8,
+ 0x00e31527, 0x001859a1, 0x00eec0ad, 0x00158c2b,
+ 0x00b23cd0, 0x00497056, 0x00bfe95a, 0x0044a5dc,
+ 0x0053da43, 0x00a896c5, 0x005e0fc9, 0x00a5434f,
+ 0x008bbd71, 0x0070f1f7, 0x008668fb, 0x007d247d,
+ 0x006a5be2, 0x00911764, 0x00678e68, 0x009cc2ee,
+ 0x00a44733, 0x005f0bb5, 0x00a992b9, 0x0052de3f,
+ 0x0045a1a0, 0x00beed26, 0x0048742a, 0x00b338ac,
+ 0x009dc692, 0x00668a14, 0x00901318, 0x006b5f9e,
+ 0x007c2001, 0x00876c87, 0x0071f58b, 0x008ab90d,
+ 0x002d09f6, 0x00d64570, 0x0020dc7c, 0x00db90fa,
+ 0x00ccef65, 0x0037a3e3, 0x00c13aef, 0x003a7669,
+ 0x00148857, 0x00efc4d1, 0x00195ddd, 0x00e2115b,
+ 0x00f56ec4, 0x000e2242, 0x00f8bb4e, 0x0003f7c8,
+ 0x004d963f, 0x00b6dab9, 0x004043b5, 0x00bb0f33,
+ 0x00ac70ac, 0x00573c2a, 0x00a1a526, 0x005ae9a0,
+ 0x0074179e, 0x008f5b18, 0x0079c214, 0x00828e92,
+ 0x0095f10d, 0x006ebd8b, 0x00982487, 0x00636801,
+ 0x00c4d8fa, 0x003f947c, 0x00c90d70, 0x003241f6,
+ 0x00253e69, 0x00de72ef, 0x0028ebe3, 0x00d3a765,
+ 0x00fd595b, 0x000615dd, 0x00f08cd1, 0x000bc057,
+ 0x001cbfc8, 0x00e7f34e, 0x00116a42, 0x00ea26c4,
+ 0x0076e42a, 0x008da8ac, 0x007b31a0, 0x00807d26,
+ 0x009702b9, 0x006c4e3f, 0x009ad733, 0x00619bb5,
+ 0x004f658b, 0x00b4290d, 0x0042b001, 0x00b9fc87,
+ 0x00ae8318, 0x0055cf9e, 0x00a35692, 0x00581a14,
+ 0x00ffaaef, 0x0004e669, 0x00f27f65, 0x000933e3,
+ 0x001e4c7c, 0x00e500fa, 0x001399f6, 0x00e8d570,
+ 0x00c62b4e, 0x003d67c8, 0x00cbfec4, 0x0030b242,
+ 0x0027cddd, 0x00dc815b, 0x002a1857, 0x00d154d1,
+ 0x009f3526, 0x006479a0, 0x0092e0ac, 0x0069ac2a,
+ 0x007ed3b5, 0x00859f33, 0x0073063f, 0x00884ab9,
+ 0x00a6b487, 0x005df801, 0x00ab610d, 0x00502d8b,
+ 0x00475214, 0x00bc1e92, 0x004a879e, 0x00b1cb18,
+ 0x00167be3, 0x00ed3765, 0x001bae69, 0x00e0e2ef,
+ 0x00f79d70, 0x000cd1f6, 0x00fa48fa, 0x0001047c,
+ 0x002ffa42, 0x00d4b6c4, 0x00222fc8, 0x00d9634e,
+ 0x00ce1cd1, 0x00355057, 0x00c3c95b, 0x003885dd,
+ 0x00000000, 0x00488f66, 0x00901ecd, 0x00d891ab,
+ 0x00db711c, 0x0093fe7a, 0x004b6fd1, 0x0003e0b7,
+ 0x00b6e338, 0x00fe6c5e, 0x0026fdf5, 0x006e7293,
+ 0x006d9224, 0x00251d42, 0x00fd8ce9, 0x00b5038f,
+ 0x006cc771, 0x00244817, 0x00fcd9bc, 0x00b456da,
+ 0x00b7b66d, 0x00ff390b, 0x0027a8a0, 0x006f27c6,
+ 0x00da2449, 0x0092ab2f, 0x004a3a84, 0x0002b5e2,
+ 0x00015555, 0x0049da33, 0x00914b98, 0x00d9c4fe,
+ 0x00d88ee3, 0x00900185, 0x0048902e, 0x00001f48,
+ 0x0003ffff, 0x004b7099, 0x0093e132, 0x00db6e54,
+ 0x006e6ddb, 0x0026e2bd, 0x00fe7316, 0x00b6fc70,
+ 0x00b51cc7, 0x00fd93a1, 0x0025020a, 0x006d8d6c,
+ 0x00b44992, 0x00fcc6f4, 0x0024575f, 0x006cd839,
+ 0x006f388e, 0x0027b7e8, 0x00ff2643, 0x00b7a925,
+ 0x0002aaaa, 0x004a25cc, 0x0092b467, 0x00da3b01,
+ 0x00d9dbb6, 0x009154d0, 0x0049c57b, 0x00014a1d,
+ 0x004b5141, 0x0003de27, 0x00db4f8c, 0x0093c0ea,
+ 0x0090205d, 0x00d8af3b, 0x00003e90, 0x0048b1f6,
+ 0x00fdb279, 0x00b53d1f, 0x006dacb4, 0x002523d2,
+ 0x0026c365, 0x006e4c03, 0x00b6dda8, 0x00fe52ce,
+ 0x00279630, 0x006f1956, 0x00b788fd, 0x00ff079b,
+ 0x00fce72c, 0x00b4684a, 0x006cf9e1, 0x00247687,
+ 0x00917508, 0x00d9fa6e, 0x00016bc5, 0x0049e4a3,
+ 0x004a0414, 0x00028b72, 0x00da1ad9, 0x009295bf,
+ 0x0093dfa2, 0x00db50c4, 0x0003c16f, 0x004b4e09,
+ 0x0048aebe, 0x000021d8, 0x00d8b073, 0x00903f15,
+ 0x00253c9a, 0x006db3fc, 0x00b52257, 0x00fdad31,
+ 0x00fe4d86, 0x00b6c2e0, 0x006e534b, 0x0026dc2d,
+ 0x00ff18d3, 0x00b797b5, 0x006f061e, 0x00278978,
+ 0x002469cf, 0x006ce6a9, 0x00b47702, 0x00fcf864,
+ 0x0049fbeb, 0x0001748d, 0x00d9e526, 0x00916a40,
+ 0x00928af7, 0x00da0591, 0x0002943a, 0x004a1b5c,
+ 0x0096a282, 0x00de2de4, 0x0006bc4f, 0x004e3329,
+ 0x004dd39e, 0x00055cf8, 0x00ddcd53, 0x00954235,
+ 0x002041ba, 0x0068cedc, 0x00b05f77, 0x00f8d011,
+ 0x00fb30a6, 0x00b3bfc0, 0x006b2e6b, 0x0023a10d,
+ 0x00fa65f3, 0x00b2ea95, 0x006a7b3e, 0x0022f458,
+ 0x002114ef, 0x00699b89, 0x00b10a22, 0x00f98544,
+ 0x004c86cb, 0x000409ad, 0x00dc9806, 0x00941760,
+ 0x0097f7d7, 0x00df78b1, 0x0007e91a, 0x004f667c,
+ 0x004e2c61, 0x0006a307, 0x00de32ac, 0x0096bdca,
+ 0x00955d7d, 0x00ddd21b, 0x000543b0, 0x004dccd6,
+ 0x00f8cf59, 0x00b0403f, 0x0068d194, 0x00205ef2,
+ 0x0023be45, 0x006b3123, 0x00b3a088, 0x00fb2fee,
+ 0x0022eb10, 0x006a6476, 0x00b2f5dd, 0x00fa7abb,
+ 0x00f99a0c, 0x00b1156a, 0x006984c1, 0x00210ba7,
+ 0x00940828, 0x00dc874e, 0x000416e5, 0x004c9983,
+ 0x004f7934, 0x0007f652, 0x00df67f9, 0x0097e89f,
+ 0x00ddf3c3, 0x00957ca5, 0x004ded0e, 0x00056268,
+ 0x000682df, 0x004e0db9, 0x00969c12, 0x00de1374,
+ 0x006b10fb, 0x00239f9d, 0x00fb0e36, 0x00b38150,
+ 0x00b061e7, 0x00f8ee81, 0x00207f2a, 0x0068f04c,
+ 0x00b134b2, 0x00f9bbd4, 0x00212a7f, 0x0069a519,
+ 0x006a45ae, 0x0022cac8, 0x00fa5b63, 0x00b2d405,
+ 0x0007d78a, 0x004f58ec, 0x0097c947, 0x00df4621,
+ 0x00dca696, 0x009429f0, 0x004cb85b, 0x0004373d,
+ 0x00057d20, 0x004df246, 0x009563ed, 0x00ddec8b,
+ 0x00de0c3c, 0x0096835a, 0x004e12f1, 0x00069d97,
+ 0x00b39e18, 0x00fb117e, 0x002380d5, 0x006b0fb3,
+ 0x0068ef04, 0x00206062, 0x00f8f1c9, 0x00b07eaf,
+ 0x0069ba51, 0x00213537, 0x00f9a49c, 0x00b12bfa,
+ 0x00b2cb4d, 0x00fa442b, 0x0022d580, 0x006a5ae6,
+ 0x00df5969, 0x0097d60f, 0x004f47a4, 0x0007c8c2,
+ 0x00042875, 0x004ca713, 0x009436b8, 0x00dcb9de,
+ 0x00000000, 0x00d70983, 0x00555f80, 0x00825603,
+ 0x0051f286, 0x0086fb05, 0x0004ad06, 0x00d3a485,
+ 0x0059a88b, 0x008ea108, 0x000cf70b, 0x00dbfe88,
+ 0x00085a0d, 0x00df538e, 0x005d058d, 0x008a0c0e,
+ 0x00491c91, 0x009e1512, 0x001c4311, 0x00cb4a92,
+ 0x0018ee17, 0x00cfe794, 0x004db197, 0x009ab814,
+ 0x0010b41a, 0x00c7bd99, 0x0045eb9a, 0x0092e219,
+ 0x0041469c, 0x00964f1f, 0x0014191c, 0x00c3109f,
+ 0x006974a4, 0x00be7d27, 0x003c2b24, 0x00eb22a7,
+ 0x00388622, 0x00ef8fa1, 0x006dd9a2, 0x00bad021,
+ 0x0030dc2f, 0x00e7d5ac, 0x006583af, 0x00b28a2c,
+ 0x00612ea9, 0x00b6272a, 0x00347129, 0x00e378aa,
+ 0x00206835, 0x00f761b6, 0x007537b5, 0x00a23e36,
+ 0x00719ab3, 0x00a69330, 0x0024c533, 0x00f3ccb0,
+ 0x0079c0be, 0x00aec93d, 0x002c9f3e, 0x00fb96bd,
+ 0x00283238, 0x00ff3bbb, 0x007d6db8, 0x00aa643b,
+ 0x0029a4ce, 0x00fead4d, 0x007cfb4e, 0x00abf2cd,
+ 0x00785648, 0x00af5fcb, 0x002d09c8, 0x00fa004b,
+ 0x00700c45, 0x00a705c6, 0x002553c5, 0x00f25a46,
+ 0x0021fec3, 0x00f6f740, 0x0074a143, 0x00a3a8c0,
+ 0x0060b85f, 0x00b7b1dc, 0x0035e7df, 0x00e2ee5c,
+ 0x00314ad9, 0x00e6435a, 0x00641559, 0x00b31cda,
+ 0x003910d4, 0x00ee1957, 0x006c4f54, 0x00bb46d7,
+ 0x0068e252, 0x00bfebd1, 0x003dbdd2, 0x00eab451,
+ 0x0040d06a, 0x0097d9e9, 0x00158fea, 0x00c28669,
+ 0x001122ec, 0x00c62b6f, 0x00447d6c, 0x009374ef,
+ 0x001978e1, 0x00ce7162, 0x004c2761, 0x009b2ee2,
+ 0x00488a67, 0x009f83e4, 0x001dd5e7, 0x00cadc64,
+ 0x0009ccfb, 0x00dec578, 0x005c937b, 0x008b9af8,
+ 0x00583e7d, 0x008f37fe, 0x000d61fd, 0x00da687e,
+ 0x00506470, 0x00876df3, 0x00053bf0, 0x00d23273,
+ 0x000196f6, 0x00d69f75, 0x0054c976, 0x0083c0f5,
+ 0x00a9041b, 0x007e0d98, 0x00fc5b9b, 0x002b5218,
+ 0x00f8f69d, 0x002fff1e, 0x00ada91d, 0x007aa09e,
+ 0x00f0ac90, 0x0027a513, 0x00a5f310, 0x0072fa93,
+ 0x00a15e16, 0x00765795, 0x00f40196, 0x00230815,
+ 0x00e0188a, 0x00371109, 0x00b5470a, 0x00624e89,
+ 0x00b1ea0c, 0x0066e38f, 0x00e4b58c, 0x0033bc0f,
+ 0x00b9b001, 0x006eb982, 0x00ecef81, 0x003be602,
+ 0x00e84287, 0x003f4b04, 0x00bd1d07, 0x006a1484,
+ 0x00c070bf, 0x0017793c, 0x00952f3f, 0x004226bc,
+ 0x00918239, 0x00468bba, 0x00c4ddb9, 0x0013d43a,
+ 0x0099d834, 0x004ed1b7, 0x00cc87b4, 0x001b8e37,
+ 0x00c82ab2, 0x001f2331, 0x009d7532, 0x004a7cb1,
+ 0x00896c2e, 0x005e65ad, 0x00dc33ae, 0x000b3a2d,
+ 0x00d89ea8, 0x000f972b, 0x008dc128, 0x005ac8ab,
+ 0x00d0c4a5, 0x0007cd26, 0x00859b25, 0x005292a6,
+ 0x00813623, 0x00563fa0, 0x00d469a3, 0x00036020,
+ 0x0080a0d5, 0x0057a956, 0x00d5ff55, 0x0002f6d6,
+ 0x00d15253, 0x00065bd0, 0x00840dd3, 0x00530450,
+ 0x00d9085e, 0x000e01dd, 0x008c57de, 0x005b5e5d,
+ 0x0088fad8, 0x005ff35b, 0x00dda558, 0x000aacdb,
+ 0x00c9bc44, 0x001eb5c7, 0x009ce3c4, 0x004bea47,
+ 0x00984ec2, 0x004f4741, 0x00cd1142, 0x001a18c1,
+ 0x009014cf, 0x00471d4c, 0x00c54b4f, 0x001242cc,
+ 0x00c1e649, 0x0016efca, 0x0094b9c9, 0x0043b04a,
+ 0x00e9d471, 0x003eddf2, 0x00bc8bf1, 0x006b8272,
+ 0x00b826f7, 0x006f2f74, 0x00ed7977, 0x003a70f4,
+ 0x00b07cfa, 0x00677579, 0x00e5237a, 0x00322af9,
+ 0x00e18e7c, 0x003687ff, 0x00b4d1fc, 0x0063d87f,
+ 0x00a0c8e0, 0x0077c163, 0x00f59760, 0x00229ee3,
+ 0x00f13a66, 0x002633e5, 0x00a465e6, 0x00736c65,
+ 0x00f9606b, 0x002e69e8, 0x00ac3feb, 0x007b3668,
+ 0x00a892ed, 0x007f9b6e, 0x00fdcd6d, 0x002ac4ee,
+ 0x00000000, 0x00520936, 0x00a4126c, 0x00f61b5a,
+ 0x004825d8, 0x001a2cee, 0x00ec37b4, 0x00be3e82,
+ 0x006b0636, 0x00390f00, 0x00cf145a, 0x009d1d6c,
+ 0x002323ee, 0x00712ad8, 0x00873182, 0x00d538b4,
+ 0x00d60c6c, 0x0084055a, 0x00721e00, 0x00201736,
+ 0x009e29b4, 0x00cc2082, 0x003a3bd8, 0x006832ee,
+ 0x00bd0a5a, 0x00ef036c, 0x00191836, 0x004b1100,
+ 0x00f52f82, 0x00a726b4, 0x00513dee, 0x000334d8,
+ 0x00ac19d8, 0x00fe10ee, 0x00080bb4, 0x005a0282,
+ 0x00e43c00, 0x00b63536, 0x00402e6c, 0x0012275a,
+ 0x00c71fee, 0x009516d8, 0x00630d82, 0x003104b4,
+ 0x008f3a36, 0x00dd3300, 0x002b285a, 0x0079216c,
+ 0x007a15b4, 0x00281c82, 0x00de07d8, 0x008c0eee,
+ 0x0032306c, 0x0060395a, 0x00962200, 0x00c42b36,
+ 0x00111382, 0x00431ab4, 0x00b501ee, 0x00e708d8,
+ 0x0059365a, 0x000b3f6c, 0x00fd2436, 0x00af2d00,
+ 0x00a37f36, 0x00f17600, 0x00076d5a, 0x0055646c,
+ 0x00eb5aee, 0x00b953d8, 0x004f4882, 0x001d41b4,
+ 0x00c87900, 0x009a7036, 0x006c6b6c, 0x003e625a,
+ 0x00805cd8, 0x00d255ee, 0x00244eb4, 0x00764782,
+ 0x0075735a, 0x00277a6c, 0x00d16136, 0x00836800,
+ 0x003d5682, 0x006f5fb4, 0x009944ee, 0x00cb4dd8,
+ 0x001e756c, 0x004c7c5a, 0x00ba6700, 0x00e86e36,
+ 0x005650b4, 0x00045982, 0x00f242d8, 0x00a04bee,
+ 0x000f66ee, 0x005d6fd8, 0x00ab7482, 0x00f97db4,
+ 0x00474336, 0x00154a00, 0x00e3515a, 0x00b1586c,
+ 0x006460d8, 0x003669ee, 0x00c072b4, 0x00927b82,
+ 0x002c4500, 0x007e4c36, 0x0088576c, 0x00da5e5a,
+ 0x00d96a82, 0x008b63b4, 0x007d78ee, 0x002f71d8,
+ 0x00914f5a, 0x00c3466c, 0x00355d36, 0x00675400,
+ 0x00b26cb4, 0x00e06582, 0x00167ed8, 0x004477ee,
+ 0x00fa496c, 0x00a8405a, 0x005e5b00, 0x000c5236,
+ 0x0046ff6c, 0x0014f65a, 0x00e2ed00, 0x00b0e436,
+ 0x000edab4, 0x005cd382, 0x00aac8d8, 0x00f8c1ee,
+ 0x002df95a, 0x007ff06c, 0x0089eb36, 0x00dbe200,
+ 0x0065dc82, 0x0037d5b4, 0x00c1ceee, 0x0093c7d8,
+ 0x0090f300, 0x00c2fa36, 0x0034e16c, 0x0066e85a,
+ 0x00d8d6d8, 0x008adfee, 0x007cc4b4, 0x002ecd82,
+ 0x00fbf536, 0x00a9fc00, 0x005fe75a, 0x000dee6c,
+ 0x00b3d0ee, 0x00e1d9d8, 0x0017c282, 0x0045cbb4,
+ 0x00eae6b4, 0x00b8ef82, 0x004ef4d8, 0x001cfdee,
+ 0x00a2c36c, 0x00f0ca5a, 0x0006d100, 0x0054d836,
+ 0x0081e082, 0x00d3e9b4, 0x0025f2ee, 0x0077fbd8,
+ 0x00c9c55a, 0x009bcc6c, 0x006dd736, 0x003fde00,
+ 0x003cead8, 0x006ee3ee, 0x0098f8b4, 0x00caf182,
+ 0x0074cf00, 0x0026c636, 0x00d0dd6c, 0x0082d45a,
+ 0x0057ecee, 0x0005e5d8, 0x00f3fe82, 0x00a1f7b4,
+ 0x001fc936, 0x004dc000, 0x00bbdb5a, 0x00e9d26c,
+ 0x00e5805a, 0x00b7896c, 0x00419236, 0x00139b00,
+ 0x00ada582, 0x00ffacb4, 0x0009b7ee, 0x005bbed8,
+ 0x008e866c, 0x00dc8f5a, 0x002a9400, 0x00789d36,
+ 0x00c6a3b4, 0x0094aa82, 0x0062b1d8, 0x0030b8ee,
+ 0x00338c36, 0x00618500, 0x00979e5a, 0x00c5976c,
+ 0x007ba9ee, 0x0029a0d8, 0x00dfbb82, 0x008db2b4,
+ 0x00588a00, 0x000a8336, 0x00fc986c, 0x00ae915a,
+ 0x0010afd8, 0x0042a6ee, 0x00b4bdb4, 0x00e6b482,
+ 0x00499982, 0x001b90b4, 0x00ed8bee, 0x00bf82d8,
+ 0x0001bc5a, 0x0053b56c, 0x00a5ae36, 0x00f7a700,
+ 0x00229fb4, 0x00709682, 0x00868dd8, 0x00d484ee,
+ 0x006aba6c, 0x0038b35a, 0x00cea800, 0x009ca136,
+ 0x009f95ee, 0x00cd9cd8, 0x003b8782, 0x00698eb4,
+ 0x00d7b036, 0x0085b900, 0x0073a25a, 0x0021ab6c,
+ 0x00f493d8, 0x00a69aee, 0x005081b4, 0x00028882,
+ 0x00bcb600, 0x00eebf36, 0x0018a46c, 0x004aad5a
+};
+
+static inline
+u32 crc24_init (void)
+{
+ /* Transformed to 32-bit CRC by multiplied by x⁸ and then byte swapped. */
+ return 0xce04b7; /* _gcry_bswap(0xb704ce << 8) */
+}
+
+static inline
+u32 crc24_next (u32 crc, byte data)
+{
+ return (crc >> 8) ^ crc24_table[(crc & 0xff) ^ data];
+}
+
+/*
+ * Process 4 bytes in one go
+ */
+static inline
+u32 crc24_next4 (u32 crc, u32 data)
+{
+ crc ^= data;
+ crc = crc24_table[(crc & 0xff) + 0x300] ^
+ crc24_table[((crc >> 8) & 0xff) + 0x200] ^
+ crc24_table[((crc >> 16) & 0xff) + 0x100] ^
+ crc24_table[(data >> 24) & 0xff];
+ return crc;
+}
+
+static inline
+u32 crc24_final (u32 crc)
+{
+ return crc & 0xffffff;
+}
+
+static void
+crc24rfc2440_init (void *context, unsigned int flags)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ u32 hwf = _gcry_get_hw_features ();
+
+#ifdef USE_INTEL_PCLMUL
+ ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
+#ifdef USE_ARM_PMULL
+ ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
+#endif
+#ifdef USE_PPC_VPMSUM
+ ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
+
+ (void)hwf;
+ (void)flags;
+
+ ctx->CRC = crc24_init();
+}
+
+static void
+crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen)
+{
+ const unsigned char *inbuf = inbuf_arg;
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ u32 crc;
+
+#ifdef USE_INTEL_PCLMUL
+ if (ctx->use_pclmul)
+ {
+ _gcry_crc24rfc2440_intel_pclmul(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+#ifdef USE_ARM_PMULL
+ if (ctx->use_pmull)
+ {
+ _gcry_crc24rfc2440_armv8_ce_pmull(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+#ifdef USE_PPC_VPMSUM
+ if (ctx->use_vpmsum)
+ {
+ _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+
+ if (!inbuf || !inlen)
+ return;
+
+ crc = ctx->CRC;
+
+ while (inlen >= 16)
+ {
+ inlen -= 16;
+ crc = crc24_next4(crc, buf_get_le32(&inbuf[0]));
+ crc = crc24_next4(crc, buf_get_le32(&inbuf[4]));
+ crc = crc24_next4(crc, buf_get_le32(&inbuf[8]));
+ crc = crc24_next4(crc, buf_get_le32(&inbuf[12]));
+ inbuf += 16;
+ }
+
+ while (inlen >= 4)
+ {
+ inlen -= 4;
+ crc = crc24_next4(crc, buf_get_le32(inbuf));
+ inbuf += 4;
+ }
+
+ while (inlen--)
+ {
+ crc = crc24_next(crc, *inbuf++);
+ }
+
+ ctx->CRC = crc;
+}
+
+static void
+crc24rfc2440_final (void *context)
+{
+ CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+ ctx->CRC = crc24_final(ctx->CRC);
+ buf_put_le32 (ctx->buf, ctx->CRC);
+}
+
+/* We allow the CRC algorithms even in FIPS mode because they are
+ actually no cryptographic primitives. */
+
+gcry_md_spec_t _gcry_digest_spec_crc32 =
+ {
+ GCRY_MD_CRC32, {0, 1},
+ "CRC32", NULL, 0, NULL, 4,
+ crc32_init, crc32_write, crc32_final, crc32_read, NULL,
+ NULL, NULL,
+ sizeof (CRC_CONTEXT)
+ };
+
+gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510 =
+ {
+ GCRY_MD_CRC32_RFC1510, {0, 1},
+ "CRC32RFC1510", NULL, 0, NULL, 4,
+ crc32rfc1510_init, crc32_write, crc32rfc1510_final, crc32_read, NULL,
+ NULL, NULL,
+ sizeof (CRC_CONTEXT)
+ };
+
+gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440 =
+ {
+ GCRY_MD_CRC24_RFC2440, {0, 1},
+ "CRC24RFC2440", NULL, 0, NULL, 3,
+ crc24rfc2440_init, crc24rfc2440_write, crc24rfc2440_final, crc32_read, NULL,
+ NULL, NULL,
+ sizeof (CRC_CONTEXT)
+ };
diff --git a/comm/third_party/libgcrypt/cipher/des-amd64.S b/comm/third_party/libgcrypt/cipher/des-amd64.S
new file mode 100644
index 0000000000..a211dac38a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/des-amd64.S
@@ -0,0 +1,1111 @@
+/* des-amd64.S - AMD64 assembly implementation of 3DES cipher
+ *
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+#define s1 0
+#define s2 ((s1) + (64*8))
+#define s3 ((s2) + (64*8))
+#define s4 ((s3) + (64*8))
+#define s5 ((s4) + (64*8))
+#define s6 ((s5) + (64*8))
+#define s7 ((s6) + (64*8))
+#define s8 ((s7) + (64*8))
+
+/* register macros */
+#define CTX %rdi
+#define SBOXES %rbp
+
+#define RL0 %r8
+#define RL1 %r9
+#define RL2 %r10
+
+#define RL0d %r8d
+#define RL1d %r9d
+#define RL2d %r10d
+
+#define RR0 %r11
+#define RR1 %r12
+#define RR2 %r13
+
+#define RR0d %r11d
+#define RR1d %r12d
+#define RR2d %r13d
+
+#define RW0 %rax
+#define RW1 %rbx
+#define RW2 %rcx
+
+#define RW0d %eax
+#define RW1d %ebx
+#define RW2d %ecx
+
+#define RW0bl %al
+#define RW1bl %bl
+#define RW2bl %cl
+
+#define RW0bh %ah
+#define RW1bh %bh
+#define RW2bh %ch
+
+#define RT0 %r15
+#define RT1 %rsi
+#define RT2 %r14
+#define RT3 %rdx
+
+#define RT0d %r15d
+#define RT1d %esi
+#define RT2d %r14d
+#define RT3d %edx
+
+/***********************************************************************
+ * 1-way 3DES
+ ***********************************************************************/
+#define do_permutation(a, b, offset, mask) \
+ movl a, RT0d; \
+ shrl $(offset), RT0d; \
+ xorl b, RT0d; \
+ andl $(mask), RT0d; \
+ xorl RT0d, b; \
+ shll $(offset), RT0d; \
+ xorl RT0d, a;
+
+#define expand_to_64bits(val, mask) \
+ movl val##d, RT0d; \
+ rorl $4, RT0d; \
+ shlq $32, RT0; \
+ orq RT0, val; \
+ andq mask, val;
+
+#define compress_to_64bits(val) \
+ movq val, RT0; \
+ shrq $32, RT0; \
+ roll $4, RT0d; \
+ orl RT0d, val##d;
+
+#define initial_permutation(left, right) \
+ do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
+ do_permutation(left##d, right##d, 16, 0x0000ffff); \
+ do_permutation(right##d, left##d, 2, 0x33333333); \
+ do_permutation(right##d, left##d, 8, 0x00ff00ff); \
+ movabs $0x3f3f3f3f3f3f3f3f, RT3; \
+ movl left##d, RW0d; \
+ roll $1, right##d; \
+ xorl right##d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, left##d; \
+ xorl RW0d, right##d; \
+ roll $1, left##d; \
+ expand_to_64bits(right, RT3); \
+ expand_to_64bits(left, RT3);
+
+#define final_permutation(left, right) \
+ compress_to_64bits(right); \
+ compress_to_64bits(left); \
+ movl right##d, RW0d; \
+ rorl $1, left##d; \
+ xorl left##d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, right##d; \
+ xorl RW0d, left##d; \
+ rorl $1, right##d; \
+ do_permutation(right##d, left##d, 8, 0x00ff00ff); \
+ do_permutation(right##d, left##d, 2, 0x33333333); \
+ do_permutation(left##d, right##d, 16, 0x0000ffff); \
+ do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
+
+#define round1(n, from, to, load_next_key) \
+ xorq from, RW0; \
+ \
+ movzbl RW0bl, RT0d; \
+ movzbl RW0bh, RT1d; \
+ shrq $16, RW0; \
+ movzbl RW0bl, RT2d; \
+ movzbl RW0bh, RT3d; \
+ shrq $16, RW0; \
+ movq s8(SBOXES, RT0, 8), RT0; \
+ xorq s6(SBOXES, RT1, 8), to; \
+ movzbl RW0bl, RL1d; \
+ movzbl RW0bh, RT1d; \
+ shrl $16, RW0d; \
+ xorq s4(SBOXES, RT2, 8), RT0; \
+ xorq s2(SBOXES, RT3, 8), to; \
+ movzbl RW0bl, RT2d; \
+ movzbl RW0bh, RT3d; \
+ xorq s7(SBOXES, RL1, 8), RT0; \
+ xorq s5(SBOXES, RT1, 8), to; \
+ xorq s3(SBOXES, RT2, 8), RT0; \
+ load_next_key(n, RW0); \
+ xorq RT0, to; \
+ xorq s1(SBOXES, RT3, 8), to; \
+
+#define load_next_key(n, RWx) \
+ movq (((n) + 1) * 8)(CTX), RWx;
+
+#define dummy2(a, b) /*_*/
+
+#define read_block(io, left, right) \
+ movl (io), left##d; \
+ movl 4(io), right##d; \
+ bswapl left##d; \
+ bswapl right##d;
+
+#define write_block(io, left, right) \
+ bswapl left##d; \
+ bswapl right##d; \
+ movl left##d, (io); \
+ movl right##d, 4(io);
+
+.align 8
+.globl _gcry_3des_amd64_crypt_block
+ELF(.type _gcry_3des_amd64_crypt_block,@function;)
+
+_gcry_3des_amd64_crypt_block:
+ /* input:
+ * %rdi: round keys, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+ pushq %r14;
+ CFI_PUSH(%r14);
+ pushq %r15;
+ CFI_PUSH(%r15);
+ pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
+
+ leaq .L_s1 rRIP, SBOXES;
+
+ read_block(%rdx, RL0, RR0);
+ initial_permutation(RL0, RR0);
+
+ movq (CTX), RW0;
+
+ round1(0, RR0, RL0, load_next_key);
+ round1(1, RL0, RR0, load_next_key);
+ round1(2, RR0, RL0, load_next_key);
+ round1(3, RL0, RR0, load_next_key);
+ round1(4, RR0, RL0, load_next_key);
+ round1(5, RL0, RR0, load_next_key);
+ round1(6, RR0, RL0, load_next_key);
+ round1(7, RL0, RR0, load_next_key);
+ round1(8, RR0, RL0, load_next_key);
+ round1(9, RL0, RR0, load_next_key);
+ round1(10, RR0, RL0, load_next_key);
+ round1(11, RL0, RR0, load_next_key);
+ round1(12, RR0, RL0, load_next_key);
+ round1(13, RL0, RR0, load_next_key);
+ round1(14, RR0, RL0, load_next_key);
+ round1(15, RL0, RR0, load_next_key);
+
+ round1(16+0, RL0, RR0, load_next_key);
+ round1(16+1, RR0, RL0, load_next_key);
+ round1(16+2, RL0, RR0, load_next_key);
+ round1(16+3, RR0, RL0, load_next_key);
+ round1(16+4, RL0, RR0, load_next_key);
+ round1(16+5, RR0, RL0, load_next_key);
+ round1(16+6, RL0, RR0, load_next_key);
+ round1(16+7, RR0, RL0, load_next_key);
+ round1(16+8, RL0, RR0, load_next_key);
+ round1(16+9, RR0, RL0, load_next_key);
+ round1(16+10, RL0, RR0, load_next_key);
+ round1(16+11, RR0, RL0, load_next_key);
+ round1(16+12, RL0, RR0, load_next_key);
+ round1(16+13, RR0, RL0, load_next_key);
+ round1(16+14, RL0, RR0, load_next_key);
+ round1(16+15, RR0, RL0, load_next_key);
+
+ round1(32+0, RR0, RL0, load_next_key);
+ round1(32+1, RL0, RR0, load_next_key);
+ round1(32+2, RR0, RL0, load_next_key);
+ round1(32+3, RL0, RR0, load_next_key);
+ round1(32+4, RR0, RL0, load_next_key);
+ round1(32+5, RL0, RR0, load_next_key);
+ round1(32+6, RR0, RL0, load_next_key);
+ round1(32+7, RL0, RR0, load_next_key);
+ round1(32+8, RR0, RL0, load_next_key);
+ round1(32+9, RL0, RR0, load_next_key);
+ round1(32+10, RR0, RL0, load_next_key);
+ round1(32+11, RL0, RR0, load_next_key);
+ round1(32+12, RR0, RL0, load_next_key);
+ round1(32+13, RL0, RR0, load_next_key);
+ round1(32+14, RR0, RL0, load_next_key);
+ round1(32+15, RL0, RR0, dummy2);
+
+ popq RW2; /*dst*/
+ CFI_POP_TMP_REG();
+ final_permutation(RR0, RL0);
+ write_block(RW2, RR0, RL0);
+
+ popq %r15;
+ CFI_POP(%r15);
+ popq %r14;
+ CFI_POP(%r14);
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
+
+/***********************************************************************
+ * 3-way 3DES
+ ***********************************************************************/
+#define expand_to_64bits(val, mask) \
+ movl val##d, RT0d; \
+ rorl $4, RT0d; \
+ shlq $32, RT0; \
+ orq RT0, val; \
+ andq mask, val;
+
+#define compress_to_64bits(val) \
+ movq val, RT0; \
+ shrq $32, RT0; \
+ roll $4, RT0d; \
+ orl RT0d, val##d;
+
+#define initial_permutation3(left, right) \
+ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
+ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
+ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
+ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
+ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
+ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
+ \
+ do_permutation(right##0d, left##0d, 2, 0x33333333); \
+ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
+ do_permutation(right##1d, left##1d, 2, 0x33333333); \
+ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
+ do_permutation(right##2d, left##2d, 2, 0x33333333); \
+ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
+ \
+ movabs $0x3f3f3f3f3f3f3f3f, RT3; \
+ \
+ movl left##0d, RW0d; \
+ roll $1, right##0d; \
+ xorl right##0d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, left##0d; \
+ xorl RW0d, right##0d; \
+ roll $1, left##0d; \
+ expand_to_64bits(right##0, RT3); \
+ expand_to_64bits(left##0, RT3); \
+ movl left##1d, RW1d; \
+ roll $1, right##1d; \
+ xorl right##1d, RW1d; \
+ andl $0xaaaaaaaa, RW1d; \
+ xorl RW1d, left##1d; \
+ xorl RW1d, right##1d; \
+ roll $1, left##1d; \
+ expand_to_64bits(right##1, RT3); \
+ expand_to_64bits(left##1, RT3); \
+ movl left##2d, RW2d; \
+ roll $1, right##2d; \
+ xorl right##2d, RW2d; \
+ andl $0xaaaaaaaa, RW2d; \
+ xorl RW2d, left##2d; \
+ xorl RW2d, right##2d; \
+ roll $1, left##2d; \
+ expand_to_64bits(right##2, RT3); \
+ expand_to_64bits(left##2, RT3);
+
+#define final_permutation3(left, right) \
+ compress_to_64bits(right##0); \
+ compress_to_64bits(left##0); \
+ movl right##0d, RW0d; \
+ rorl $1, left##0d; \
+ xorl left##0d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, right##0d; \
+ xorl RW0d, left##0d; \
+ rorl $1, right##0d; \
+ compress_to_64bits(right##1); \
+ compress_to_64bits(left##1); \
+ movl right##1d, RW1d; \
+ rorl $1, left##1d; \
+ xorl left##1d, RW1d; \
+ andl $0xaaaaaaaa, RW1d; \
+ xorl RW1d, right##1d; \
+ xorl RW1d, left##1d; \
+ rorl $1, right##1d; \
+ compress_to_64bits(right##2); \
+ compress_to_64bits(left##2); \
+ movl right##2d, RW2d; \
+ rorl $1, left##2d; \
+ xorl left##2d, RW2d; \
+ andl $0xaaaaaaaa, RW2d; \
+ xorl RW2d, right##2d; \
+ xorl RW2d, left##2d; \
+ rorl $1, right##2d; \
+ \
+ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
+ do_permutation(right##0d, left##0d, 2, 0x33333333); \
+ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
+ do_permutation(right##1d, left##1d, 2, 0x33333333); \
+ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
+ do_permutation(right##2d, left##2d, 2, 0x33333333); \
+ \
+ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
+ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
+ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
+ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
+ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
+ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
+
+#define round3(n, from, to, load_next_key, do_movq) \
+ xorq from##0, RW0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ shrq $16, RW0; \
+ xorq s8(SBOXES, RT3, 8), to##0; \
+ xorq s6(SBOXES, RT1, 8), to##0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ shrq $16, RW0; \
+ xorq s4(SBOXES, RT3, 8), to##0; \
+ xorq s2(SBOXES, RT1, 8), to##0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ shrl $16, RW0d; \
+ xorq s7(SBOXES, RT3, 8), to##0; \
+ xorq s5(SBOXES, RT1, 8), to##0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ load_next_key(n, RW0); \
+ xorq s3(SBOXES, RT3, 8), to##0; \
+ xorq s1(SBOXES, RT1, 8), to##0; \
+ xorq from##1, RW1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ shrq $16, RW1; \
+ xorq s8(SBOXES, RT3, 8), to##1; \
+ xorq s6(SBOXES, RT1, 8), to##1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ shrq $16, RW1; \
+ xorq s4(SBOXES, RT3, 8), to##1; \
+ xorq s2(SBOXES, RT1, 8), to##1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ shrl $16, RW1d; \
+ xorq s7(SBOXES, RT3, 8), to##1; \
+ xorq s5(SBOXES, RT1, 8), to##1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ do_movq(RW0, RW1); \
+ xorq s3(SBOXES, RT3, 8), to##1; \
+ xorq s1(SBOXES, RT1, 8), to##1; \
+ xorq from##2, RW2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ shrq $16, RW2; \
+ xorq s8(SBOXES, RT3, 8), to##2; \
+ xorq s6(SBOXES, RT1, 8), to##2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ shrq $16, RW2; \
+ xorq s4(SBOXES, RT3, 8), to##2; \
+ xorq s2(SBOXES, RT1, 8), to##2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ shrl $16, RW2d; \
+ xorq s7(SBOXES, RT3, 8), to##2; \
+ xorq s5(SBOXES, RT1, 8), to##2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ do_movq(RW0, RW2); \
+ xorq s3(SBOXES, RT3, 8), to##2; \
+ xorq s1(SBOXES, RT1, 8), to##2;
+
+#define __movq(src, dst) \
+ movq src, dst;
+
+#define read_block(io, left, right) \
+ movl (io), left##d; \
+ movl 4(io), right##d; \
+ bswapl left##d; \
+ bswapl right##d;
+
+#define write_block(io, left, right) \
+ bswapl left##d; \
+ bswapl right##d; \
+ movl left##d, (io); \
+ movl right##d, 4(io);
+
+.align 8
+ELF(.type _gcry_3des_amd64_crypt_blk3,@function;)
+_gcry_3des_amd64_crypt_blk3:
+ /* input:
+ * %rdi: round keys, CTX
+ * RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks
+ * RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks
+ */
+ CFI_STARTPROC();
+
+ leaq .L_s1 rRIP, SBOXES;
+
+ initial_permutation3(RL, RR);
+
+ movq 0(CTX), RW0;
+ movq RW0, RW1;
+ movq RW0, RW2;
+
+ round3(0, RR, RL, load_next_key, __movq);
+ round3(1, RL, RR, load_next_key, __movq);
+ round3(2, RR, RL, load_next_key, __movq);
+ round3(3, RL, RR, load_next_key, __movq);
+ round3(4, RR, RL, load_next_key, __movq);
+ round3(5, RL, RR, load_next_key, __movq);
+ round3(6, RR, RL, load_next_key, __movq);
+ round3(7, RL, RR, load_next_key, __movq);
+ round3(8, RR, RL, load_next_key, __movq);
+ round3(9, RL, RR, load_next_key, __movq);
+ round3(10, RR, RL, load_next_key, __movq);
+ round3(11, RL, RR, load_next_key, __movq);
+ round3(12, RR, RL, load_next_key, __movq);
+ round3(13, RL, RR, load_next_key, __movq);
+ round3(14, RR, RL, load_next_key, __movq);
+ round3(15, RL, RR, load_next_key, __movq);
+
+ round3(16+0, RL, RR, load_next_key, __movq);
+ round3(16+1, RR, RL, load_next_key, __movq);
+ round3(16+2, RL, RR, load_next_key, __movq);
+ round3(16+3, RR, RL, load_next_key, __movq);
+ round3(16+4, RL, RR, load_next_key, __movq);
+ round3(16+5, RR, RL, load_next_key, __movq);
+ round3(16+6, RL, RR, load_next_key, __movq);
+ round3(16+7, RR, RL, load_next_key, __movq);
+ round3(16+8, RL, RR, load_next_key, __movq);
+ round3(16+9, RR, RL, load_next_key, __movq);
+ round3(16+10, RL, RR, load_next_key, __movq);
+ round3(16+11, RR, RL, load_next_key, __movq);
+ round3(16+12, RL, RR, load_next_key, __movq);
+ round3(16+13, RR, RL, load_next_key, __movq);
+ round3(16+14, RL, RR, load_next_key, __movq);
+ round3(16+15, RR, RL, load_next_key, __movq);
+
+ round3(32+0, RR, RL, load_next_key, __movq);
+ round3(32+1, RL, RR, load_next_key, __movq);
+ round3(32+2, RR, RL, load_next_key, __movq);
+ round3(32+3, RL, RR, load_next_key, __movq);
+ round3(32+4, RR, RL, load_next_key, __movq);
+ round3(32+5, RL, RR, load_next_key, __movq);
+ round3(32+6, RR, RL, load_next_key, __movq);
+ round3(32+7, RL, RR, load_next_key, __movq);
+ round3(32+8, RR, RL, load_next_key, __movq);
+ round3(32+9, RL, RR, load_next_key, __movq);
+ round3(32+10, RR, RL, load_next_key, __movq);
+ round3(32+11, RL, RR, load_next_key, __movq);
+ round3(32+12, RR, RL, load_next_key, __movq);
+ round3(32+13, RL, RR, load_next_key, __movq);
+ round3(32+14, RR, RL, load_next_key, __movq);
+ round3(32+15, RL, RR, dummy2, dummy2);
+
+ final_permutation3(RR, RL);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
+
+.align 8
+.globl _gcry_3des_amd64_cbc_dec
+ELF(.type _gcry_3des_amd64_cbc_dec,@function;)
+_gcry_3des_amd64_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+ pushq %r14;
+ CFI_PUSH(%r14);
+ pushq %r15;
+ CFI_PUSH(%r15);
+
+ pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
+ pushq %rdx; /*src*/
+ CFI_PUSH(%rdx);
+ pushq %rcx; /*iv*/
+ CFI_PUSH(%rcx);
+
+ /* load input */
+ movl 0 * 4(%rdx), RL0d;
+ movl 1 * 4(%rdx), RR0d;
+ movl 2 * 4(%rdx), RL1d;
+ movl 3 * 4(%rdx), RR1d;
+ movl 4 * 4(%rdx), RL2d;
+ movl 5 * 4(%rdx), RR2d;
+
+ bswapl RL0d;
+ bswapl RR0d;
+ bswapl RL1d;
+ bswapl RR1d;
+ bswapl RL2d;
+ bswapl RR2d;
+
+ call _gcry_3des_amd64_crypt_blk3;
+
+ popq %rcx; /*iv*/
+ CFI_POP_TMP_REG();
+ popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
+ popq %rsi; /*dst*/
+ CFI_POP_TMP_REG();
+
+ bswapl RR0d;
+ bswapl RL0d;
+ bswapl RR1d;
+ bswapl RL1d;
+ bswapl RR2d;
+ bswapl RL2d;
+
+ movq 2 * 8(%rdx), RT0;
+ xorl 0 * 4(%rcx), RR0d;
+ xorl 1 * 4(%rcx), RL0d;
+ xorl 0 * 4(%rdx), RR1d;
+ xorl 1 * 4(%rdx), RL1d;
+ xorl 2 * 4(%rdx), RR2d;
+ xorl 3 * 4(%rdx), RL2d;
+ movq RT0, (%rcx); /* store new IV */
+
+ movl RR0d, 0 * 4(%rsi);
+ movl RL0d, 1 * 4(%rsi);
+ movl RR1d, 2 * 4(%rsi);
+ movl RL1d, 3 * 4(%rsi);
+ movl RR2d, 4 * 4(%rsi);
+ movl RL2d, 5 * 4(%rsi);
+
+ popq %r15;
+ CFI_POP(%r15);
+ popq %r14;
+ CFI_POP(%r14);
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
+
+.align 8
+.globl _gcry_3des_amd64_ctr_enc
+ELF(.type _gcry_3des_amd64_ctr_enc,@function;)
+_gcry_3des_amd64_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+ pushq %r14;
+ CFI_PUSH(%r14);
+ pushq %r15;
+ CFI_PUSH(%r15);
+
+ pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
+ pushq %rdx; /*src*/
+ CFI_PUSH(%rdx);
+ movq %rcx, RW2;
+
+ /* load IV and byteswap */
+ movq (RW2), RT0;
+ bswapq RT0;
+ movq RT0, RR0;
+
+ /* construct IVs */
+ leaq 1(RT0), RR1;
+ leaq 2(RT0), RR2;
+ leaq 3(RT0), RT0;
+ movq RR0, RL0;
+ movq RR1, RL1;
+ movq RR2, RL2;
+ bswapq RT0;
+ shrq $32, RL0;
+ shrq $32, RL1;
+ shrq $32, RL2;
+
+ /* store new IV */
+ movq RT0, (RW2);
+
+ call _gcry_3des_amd64_crypt_blk3;
+
+ popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
+ popq %rsi; /*dst*/
+ CFI_POP_TMP_REG();
+
+ bswapl RR0d;
+ bswapl RL0d;
+ bswapl RR1d;
+ bswapl RL1d;
+ bswapl RR2d;
+ bswapl RL2d;
+
+ xorl 0 * 4(%rdx), RR0d;
+ xorl 1 * 4(%rdx), RL0d;
+ xorl 2 * 4(%rdx), RR1d;
+ xorl 3 * 4(%rdx), RL1d;
+ xorl 4 * 4(%rdx), RR2d;
+ xorl 5 * 4(%rdx), RL2d;
+
+ movl RR0d, 0 * 4(%rsi);
+ movl RL0d, 1 * 4(%rsi);
+ movl RR1d, 2 * 4(%rsi);
+ movl RL1d, 3 * 4(%rsi);
+ movl RR2d, 4 * 4(%rsi);
+ movl RL2d, 5 * 4(%rsi);
+
+ popq %r15;
+ CFI_POP(%r15);
+ popq %r14;
+ CFI_POP(%r14);
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
+
+.align 8
+.globl _gcry_3des_amd64_cfb_dec
+ELF(.type _gcry_3des_amd64_cfb_dec,@function;)
+_gcry_3des_amd64_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (64bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %r12;
+ CFI_PUSH(%r12);
+ pushq %r13;
+ CFI_PUSH(%r13);
+ pushq %r14;
+ CFI_PUSH(%r14);
+ pushq %r15;
+ CFI_PUSH(%r15);
+
+ pushq %rsi; /*dst*/
+ CFI_PUSH(%rsi);
+ pushq %rdx; /*src*/
+ CFI_PUSH(%rdx);
+ movq %rcx, RW2;
+
+ /* Load input */
+ movl 0 * 4(RW2), RL0d;
+ movl 1 * 4(RW2), RR0d;
+ movl 0 * 4(%rdx), RL1d;
+ movl 1 * 4(%rdx), RR1d;
+ movl 2 * 4(%rdx), RL2d;
+ movl 3 * 4(%rdx), RR2d;
+
+ bswapl RL0d;
+ bswapl RR0d;
+ bswapl RL1d;
+ bswapl RR1d;
+ bswapl RL2d;
+ bswapl RR2d;
+
+ /* Update IV */
+ movq 4 * 4(%rdx), RW0;
+ movq RW0, (RW2);
+
+ call _gcry_3des_amd64_crypt_blk3;
+
+ popq %rdx; /*src*/
+ CFI_POP_TMP_REG();
+ popq %rsi; /*dst*/
+ CFI_POP_TMP_REG();
+
+ bswapl RR0d;
+ bswapl RL0d;
+ bswapl RR1d;
+ bswapl RL1d;
+ bswapl RR2d;
+ bswapl RL2d;
+
+ xorl 0 * 4(%rdx), RR0d;
+ xorl 1 * 4(%rdx), RL0d;
+ xorl 2 * 4(%rdx), RR1d;
+ xorl 3 * 4(%rdx), RL1d;
+ xorl 4 * 4(%rdx), RR2d;
+ xorl 5 * 4(%rdx), RL2d;
+
+ movl RR0d, 0 * 4(%rsi);
+ movl RL0d, 1 * 4(%rsi);
+ movl RR1d, 2 * 4(%rsi);
+ movl RL1d, 3 * 4(%rsi);
+ movl RR2d, 4 * 4(%rsi);
+ movl RL2d, 5 * 4(%rsi);
+
+ popq %r15;
+ CFI_POP(%r15);
+ popq %r14;
+ CFI_POP(%r14);
+ popq %r13;
+ CFI_POP(%r13);
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbx;
+ CFI_POP(%rbx);
+ popq %rbp;
+ CFI_POP(%rbp);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
+
+.align 16
+.L_s1:
+ .quad 0x0010100001010400, 0x0000000000000000
+ .quad 0x0000100000010000, 0x0010100001010404
+ .quad 0x0010100001010004, 0x0000100000010404
+ .quad 0x0000000000000004, 0x0000100000010000
+ .quad 0x0000000000000400, 0x0010100001010400
+ .quad 0x0010100001010404, 0x0000000000000400
+ .quad 0x0010000001000404, 0x0010100001010004
+ .quad 0x0010000001000000, 0x0000000000000004
+ .quad 0x0000000000000404, 0x0010000001000400
+ .quad 0x0010000001000400, 0x0000100000010400
+ .quad 0x0000100000010400, 0x0010100001010000
+ .quad 0x0010100001010000, 0x0010000001000404
+ .quad 0x0000100000010004, 0x0010000001000004
+ .quad 0x0010000001000004, 0x0000100000010004
+ .quad 0x0000000000000000, 0x0000000000000404
+ .quad 0x0000100000010404, 0x0010000001000000
+ .quad 0x0000100000010000, 0x0010100001010404
+ .quad 0x0000000000000004, 0x0010100001010000
+ .quad 0x0010100001010400, 0x0010000001000000
+ .quad 0x0010000001000000, 0x0000000000000400
+ .quad 0x0010100001010004, 0x0000100000010000
+ .quad 0x0000100000010400, 0x0010000001000004
+ .quad 0x0000000000000400, 0x0000000000000004
+ .quad 0x0010000001000404, 0x0000100000010404
+ .quad 0x0010100001010404, 0x0000100000010004
+ .quad 0x0010100001010000, 0x0010000001000404
+ .quad 0x0010000001000004, 0x0000000000000404
+ .quad 0x0000100000010404, 0x0010100001010400
+ .quad 0x0000000000000404, 0x0010000001000400
+ .quad 0x0010000001000400, 0x0000000000000000
+ .quad 0x0000100000010004, 0x0000100000010400
+ .quad 0x0000000000000000, 0x0010100001010004
+.L_s2:
+ .quad 0x0801080200100020, 0x0800080000000000
+ .quad 0x0000080000000000, 0x0001080200100020
+ .quad 0x0001000000100000, 0x0000000200000020
+ .quad 0x0801000200100020, 0x0800080200000020
+ .quad 0x0800000200000020, 0x0801080200100020
+ .quad 0x0801080000100000, 0x0800000000000000
+ .quad 0x0800080000000000, 0x0001000000100000
+ .quad 0x0000000200000020, 0x0801000200100020
+ .quad 0x0001080000100000, 0x0001000200100020
+ .quad 0x0800080200000020, 0x0000000000000000
+ .quad 0x0800000000000000, 0x0000080000000000
+ .quad 0x0001080200100020, 0x0801000000100000
+ .quad 0x0001000200100020, 0x0800000200000020
+ .quad 0x0000000000000000, 0x0001080000100000
+ .quad 0x0000080200000020, 0x0801080000100000
+ .quad 0x0801000000100000, 0x0000080200000020
+ .quad 0x0000000000000000, 0x0001080200100020
+ .quad 0x0801000200100020, 0x0001000000100000
+ .quad 0x0800080200000020, 0x0801000000100000
+ .quad 0x0801080000100000, 0x0000080000000000
+ .quad 0x0801000000100000, 0x0800080000000000
+ .quad 0x0000000200000020, 0x0801080200100020
+ .quad 0x0001080200100020, 0x0000000200000020
+ .quad 0x0000080000000000, 0x0800000000000000
+ .quad 0x0000080200000020, 0x0801080000100000
+ .quad 0x0001000000100000, 0x0800000200000020
+ .quad 0x0001000200100020, 0x0800080200000020
+ .quad 0x0800000200000020, 0x0001000200100020
+ .quad 0x0001080000100000, 0x0000000000000000
+ .quad 0x0800080000000000, 0x0000080200000020
+ .quad 0x0800000000000000, 0x0801000200100020
+ .quad 0x0801080200100020, 0x0001080000100000
+.L_s3:
+ .quad 0x0000002000000208, 0x0000202008020200
+ .quad 0x0000000000000000, 0x0000200008020008
+ .quad 0x0000002008000200, 0x0000000000000000
+ .quad 0x0000202000020208, 0x0000002008000200
+ .quad 0x0000200000020008, 0x0000000008000008
+ .quad 0x0000000008000008, 0x0000200000020000
+ .quad 0x0000202008020208, 0x0000200000020008
+ .quad 0x0000200008020000, 0x0000002000000208
+ .quad 0x0000000008000000, 0x0000000000000008
+ .quad 0x0000202008020200, 0x0000002000000200
+ .quad 0x0000202000020200, 0x0000200008020000
+ .quad 0x0000200008020008, 0x0000202000020208
+ .quad 0x0000002008000208, 0x0000202000020200
+ .quad 0x0000200000020000, 0x0000002008000208
+ .quad 0x0000000000000008, 0x0000202008020208
+ .quad 0x0000002000000200, 0x0000000008000000
+ .quad 0x0000202008020200, 0x0000000008000000
+ .quad 0x0000200000020008, 0x0000002000000208
+ .quad 0x0000200000020000, 0x0000202008020200
+ .quad 0x0000002008000200, 0x0000000000000000
+ .quad 0x0000002000000200, 0x0000200000020008
+ .quad 0x0000202008020208, 0x0000002008000200
+ .quad 0x0000000008000008, 0x0000002000000200
+ .quad 0x0000000000000000, 0x0000200008020008
+ .quad 0x0000002008000208, 0x0000200000020000
+ .quad 0x0000000008000000, 0x0000202008020208
+ .quad 0x0000000000000008, 0x0000202000020208
+ .quad 0x0000202000020200, 0x0000000008000008
+ .quad 0x0000200008020000, 0x0000002008000208
+ .quad 0x0000002000000208, 0x0000200008020000
+ .quad 0x0000202000020208, 0x0000000000000008
+ .quad 0x0000200008020008, 0x0000202000020200
+.L_s4:
+ .quad 0x1008020000002001, 0x1000020800002001
+ .quad 0x1000020800002001, 0x0000000800000000
+ .quad 0x0008020800002000, 0x1008000800000001
+ .quad 0x1008000000000001, 0x1000020000002001
+ .quad 0x0000000000000000, 0x0008020000002000
+ .quad 0x0008020000002000, 0x1008020800002001
+ .quad 0x1000000800000001, 0x0000000000000000
+ .quad 0x0008000800000000, 0x1008000000000001
+ .quad 0x1000000000000001, 0x0000020000002000
+ .quad 0x0008000000000000, 0x1008020000002001
+ .quad 0x0000000800000000, 0x0008000000000000
+ .quad 0x1000020000002001, 0x0000020800002000
+ .quad 0x1008000800000001, 0x1000000000000001
+ .quad 0x0000020800002000, 0x0008000800000000
+ .quad 0x0000020000002000, 0x0008020800002000
+ .quad 0x1008020800002001, 0x1000000800000001
+ .quad 0x0008000800000000, 0x1008000000000001
+ .quad 0x0008020000002000, 0x1008020800002001
+ .quad 0x1000000800000001, 0x0000000000000000
+ .quad 0x0000000000000000, 0x0008020000002000
+ .quad 0x0000020800002000, 0x0008000800000000
+ .quad 0x1008000800000001, 0x1000000000000001
+ .quad 0x1008020000002001, 0x1000020800002001
+ .quad 0x1000020800002001, 0x0000000800000000
+ .quad 0x1008020800002001, 0x1000000800000001
+ .quad 0x1000000000000001, 0x0000020000002000
+ .quad 0x1008000000000001, 0x1000020000002001
+ .quad 0x0008020800002000, 0x1008000800000001
+ .quad 0x1000020000002001, 0x0000020800002000
+ .quad 0x0008000000000000, 0x1008020000002001
+ .quad 0x0000000800000000, 0x0008000000000000
+ .quad 0x0000020000002000, 0x0008020800002000
+.L_s5:
+ .quad 0x0000001000000100, 0x0020001002080100
+ .quad 0x0020000002080000, 0x0420001002000100
+ .quad 0x0000000000080000, 0x0000001000000100
+ .quad 0x0400000000000000, 0x0020000002080000
+ .quad 0x0400001000080100, 0x0000000000080000
+ .quad 0x0020001002000100, 0x0400001000080100
+ .quad 0x0420001002000100, 0x0420000002080000
+ .quad 0x0000001000080100, 0x0400000000000000
+ .quad 0x0020000002000000, 0x0400000000080000
+ .quad 0x0400000000080000, 0x0000000000000000
+ .quad 0x0400001000000100, 0x0420001002080100
+ .quad 0x0420001002080100, 0x0020001002000100
+ .quad 0x0420000002080000, 0x0400001000000100
+ .quad 0x0000000000000000, 0x0420000002000000
+ .quad 0x0020001002080100, 0x0020000002000000
+ .quad 0x0420000002000000, 0x0000001000080100
+ .quad 0x0000000000080000, 0x0420001002000100
+ .quad 0x0000001000000100, 0x0020000002000000
+ .quad 0x0400000000000000, 0x0020000002080000
+ .quad 0x0420001002000100, 0x0400001000080100
+ .quad 0x0020001002000100, 0x0400000000000000
+ .quad 0x0420000002080000, 0x0020001002080100
+ .quad 0x0400001000080100, 0x0000001000000100
+ .quad 0x0020000002000000, 0x0420000002080000
+ .quad 0x0420001002080100, 0x0000001000080100
+ .quad 0x0420000002000000, 0x0420001002080100
+ .quad 0x0020000002080000, 0x0000000000000000
+ .quad 0x0400000000080000, 0x0420000002000000
+ .quad 0x0000001000080100, 0x0020001002000100
+ .quad 0x0400001000000100, 0x0000000000080000
+ .quad 0x0000000000000000, 0x0400000000080000
+ .quad 0x0020001002080100, 0x0400001000000100
+.L_s6:
+ .quad 0x0200000120000010, 0x0204000020000000
+ .quad 0x0000040000000000, 0x0204040120000010
+ .quad 0x0204000020000000, 0x0000000100000010
+ .quad 0x0204040120000010, 0x0004000000000000
+ .quad 0x0200040020000000, 0x0004040100000010
+ .quad 0x0004000000000000, 0x0200000120000010
+ .quad 0x0004000100000010, 0x0200040020000000
+ .quad 0x0200000020000000, 0x0000040100000010
+ .quad 0x0000000000000000, 0x0004000100000010
+ .quad 0x0200040120000010, 0x0000040000000000
+ .quad 0x0004040000000000, 0x0200040120000010
+ .quad 0x0000000100000010, 0x0204000120000010
+ .quad 0x0204000120000010, 0x0000000000000000
+ .quad 0x0004040100000010, 0x0204040020000000
+ .quad 0x0000040100000010, 0x0004040000000000
+ .quad 0x0204040020000000, 0x0200000020000000
+ .quad 0x0200040020000000, 0x0000000100000010
+ .quad 0x0204000120000010, 0x0004040000000000
+ .quad 0x0204040120000010, 0x0004000000000000
+ .quad 0x0000040100000010, 0x0200000120000010
+ .quad 0x0004000000000000, 0x0200040020000000
+ .quad 0x0200000020000000, 0x0000040100000010
+ .quad 0x0200000120000010, 0x0204040120000010
+ .quad 0x0004040000000000, 0x0204000020000000
+ .quad 0x0004040100000010, 0x0204040020000000
+ .quad 0x0000000000000000, 0x0204000120000010
+ .quad 0x0000000100000010, 0x0000040000000000
+ .quad 0x0204000020000000, 0x0004040100000010
+ .quad 0x0000040000000000, 0x0004000100000010
+ .quad 0x0200040120000010, 0x0000000000000000
+ .quad 0x0204040020000000, 0x0200000020000000
+ .quad 0x0004000100000010, 0x0200040120000010
+.L_s7:
+ .quad 0x0002000000200000, 0x2002000004200002
+ .quad 0x2000000004000802, 0x0000000000000000
+ .quad 0x0000000000000800, 0x2000000004000802
+ .quad 0x2002000000200802, 0x0002000004200800
+ .quad 0x2002000004200802, 0x0002000000200000
+ .quad 0x0000000000000000, 0x2000000004000002
+ .quad 0x2000000000000002, 0x0000000004000000
+ .quad 0x2002000004200002, 0x2000000000000802
+ .quad 0x0000000004000800, 0x2002000000200802
+ .quad 0x2002000000200002, 0x0000000004000800
+ .quad 0x2000000004000002, 0x0002000004200000
+ .quad 0x0002000004200800, 0x2002000000200002
+ .quad 0x0002000004200000, 0x0000000000000800
+ .quad 0x2000000000000802, 0x2002000004200802
+ .quad 0x0002000000200800, 0x2000000000000002
+ .quad 0x0000000004000000, 0x0002000000200800
+ .quad 0x0000000004000000, 0x0002000000200800
+ .quad 0x0002000000200000, 0x2000000004000802
+ .quad 0x2000000004000802, 0x2002000004200002
+ .quad 0x2002000004200002, 0x2000000000000002
+ .quad 0x2002000000200002, 0x0000000004000000
+ .quad 0x0000000004000800, 0x0002000000200000
+ .quad 0x0002000004200800, 0x2000000000000802
+ .quad 0x2002000000200802, 0x0002000004200800
+ .quad 0x2000000000000802, 0x2000000004000002
+ .quad 0x2002000004200802, 0x0002000004200000
+ .quad 0x0002000000200800, 0x0000000000000000
+ .quad 0x2000000000000002, 0x2002000004200802
+ .quad 0x0000000000000000, 0x2002000000200802
+ .quad 0x0002000004200000, 0x0000000000000800
+ .quad 0x2000000004000002, 0x0000000004000800
+ .quad 0x0000000000000800, 0x2002000000200002
+.L_s8:
+ .quad 0x0100010410001000, 0x0000010000001000
+ .quad 0x0000000000040000, 0x0100010410041000
+ .quad 0x0100000010000000, 0x0100010410001000
+ .quad 0x0000000400000000, 0x0100000010000000
+ .quad 0x0000000400040000, 0x0100000010040000
+ .quad 0x0100010410041000, 0x0000010000041000
+ .quad 0x0100010010041000, 0x0000010400041000
+ .quad 0x0000010000001000, 0x0000000400000000
+ .quad 0x0100000010040000, 0x0100000410000000
+ .quad 0x0100010010001000, 0x0000010400001000
+ .quad 0x0000010000041000, 0x0000000400040000
+ .quad 0x0100000410040000, 0x0100010010041000
+ .quad 0x0000010400001000, 0x0000000000000000
+ .quad 0x0000000000000000, 0x0100000410040000
+ .quad 0x0100000410000000, 0x0100010010001000
+ .quad 0x0000010400041000, 0x0000000000040000
+ .quad 0x0000010400041000, 0x0000000000040000
+ .quad 0x0100010010041000, 0x0000010000001000
+ .quad 0x0000000400000000, 0x0100000410040000
+ .quad 0x0000010000001000, 0x0000010400041000
+ .quad 0x0100010010001000, 0x0000000400000000
+ .quad 0x0100000410000000, 0x0100000010040000
+ .quad 0x0100000410040000, 0x0100000010000000
+ .quad 0x0000000000040000, 0x0100010410001000
+ .quad 0x0000000000000000, 0x0100010410041000
+ .quad 0x0000000400040000, 0x0100000410000000
+ .quad 0x0100000010040000, 0x0100010010001000
+ .quad 0x0100010410001000, 0x0000000000000000
+ .quad 0x0100010410041000, 0x0000010000041000
+ .quad 0x0000010000041000, 0x0000010400001000
+ .quad 0x0000010400001000, 0x0000000400040000
+ .quad 0x0100000010000000, 0x0100010010041000
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/des.c b/comm/third_party/libgcrypt/cipher/des.c
new file mode 100644
index 0000000000..1580ea4ec5
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/des.c
@@ -0,0 +1,1507 @@
+/* des.c - DES and Triple-DES encryption/decryption Algorithm
+ * Copyright (C) 1998, 1999, 2001, 2002, 2003,
+ * 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * For a description of triple encryption, see:
+ * Bruce Schneier: Applied Cryptography. Second Edition.
+ * John Wiley & Sons, 1996. ISBN 0-471-12845-7. Pages 358 ff.
+ * This implementation is according to the definition of DES in FIPS
+ * PUB 46-2 from December 1993.
+ */
+
+
+/*
+ * Written by Michael Roth <mroth@nessie.de>, September 1998
+ */
+
+
+/*
+ * U S A G E
+ * ===========
+ *
+ * For DES or Triple-DES encryption/decryption you must initialize a proper
+ * encryption context with a key.
+ *
+ * A DES key is 64bit wide but only 56bits of the key are used. The remaining
+ * bits are parity bits and they will _not_ checked in this implementation, but
+ * simply ignored.
+ *
+ * For Triple-DES you could use either two 64bit keys or three 64bit keys.
+ * The parity bits will _not_ checked, too.
+ *
+ * After initializing a context with a key you could use this context to
+ * encrypt or decrypt data in 64bit blocks in Electronic Codebook Mode.
+ *
+ * (In the examples below the slashes at the beginning and ending of comments
+ * are omitted.)
+ *
+ * DES Example
+ * -----------
+ * unsigned char key[8];
+ * unsigned char plaintext[8];
+ * unsigned char ciphertext[8];
+ * unsigned char recoverd[8];
+ * des_ctx context;
+ *
+ * * Fill 'key' and 'plaintext' with some data *
+ * ....
+ *
+ * * Set up the DES encryption context *
+ * des_setkey(context, key);
+ *
+ * * Encrypt the plaintext *
+ * des_ecb_encrypt(context, plaintext, ciphertext);
+ *
+ * * To recover the original plaintext from ciphertext use: *
+ * des_ecb_decrypt(context, ciphertext, recoverd);
+ *
+ *
+ * Triple-DES Example
+ * ------------------
+ * unsigned char key1[8];
+ * unsigned char key2[8];
+ * unsigned char key3[8];
+ * unsigned char plaintext[8];
+ * unsigned char ciphertext[8];
+ * unsigned char recoverd[8];
+ * tripledes_ctx context;
+ *
+ * * If you would like to use two 64bit keys, fill 'key1' and'key2'
+ * then setup the encryption context: *
+ * tripledes_set2keys(context, key1, key2);
+ *
+ * * To use three 64bit keys with Triple-DES use: *
+ * tripledes_set3keys(context, key1, key2, key3);
+ *
+ * * Encrypting plaintext with Triple-DES *
+ * tripledes_ecb_encrypt(context, plaintext, ciphertext);
+ *
+ * * Decrypting ciphertext to recover the plaintext with Triple-DES *
+ * tripledes_ecb_decrypt(context, ciphertext, recoverd);
+ *
+ *
+ * Selftest
+ * --------
+ * char *error_msg;
+ *
+ * * To perform a selftest of this DES/Triple-DES implementation use the
+ * function selftest(). It will return an error string if there are
+ * some problems with this library. *
+ *
+ * if ( (error_msg = selftest()) )
+ * {
+ * fprintf(stderr, "An error in the DES/Triple-DES implementation occurred: %s\n", error_msg);
+ * abort();
+ * }
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <string.h> /* memcpy, memcmp */
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+
+#define DES_BLOCKSIZE 8
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* Helper macro to force alignment to 16 bytes. */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16 __attribute__ ((aligned (16)))
+#else
+# define ATTR_ALIGNED_16
+#endif
+
+#if defined(__GNUC__) && defined(__GNU_LIBRARY__)
+# define working_memcmp memcmp
+#else
+/*
+ * According to the SunOS man page, memcmp returns indeterminate sign
+ * depending on whether characters are signed or not.
+ */
+static int
+working_memcmp( const void *_a, const void *_b, size_t n )
+{
+ const char *a = _a;
+ const char *b = _b;
+ for( ; n; n--, a++, b++ )
+ if( *a != *b )
+ return (int)(*(byte*)a) - (int)(*(byte*)b);
+ return 0;
+}
+#endif
+
+/*
+ * Encryption/Decryption context of DES
+ */
+typedef struct _des_ctx
+ {
+ u32 encrypt_subkeys[32];
+ u32 decrypt_subkeys[32];
+ }
+des_ctx[1];
+
+/*
+ * Encryption/Decryption context of Triple-DES
+ */
+typedef struct _tripledes_ctx
+ {
+ u32 encrypt_subkeys[96];
+ u32 decrypt_subkeys[96];
+ struct {
+ int no_weak_key;
+ } flags;
+ }
+tripledes_ctx[1];
+
+static void des_key_schedule (const byte *, u32 *);
+static int des_setkey (struct _des_ctx *, const byte *);
+static int des_ecb_crypt (struct _des_ctx *, const byte *, byte *, int);
+static int tripledes_set2keys (struct _tripledes_ctx *,
+ const byte *, const byte *);
+static int tripledes_set3keys (struct _tripledes_ctx *,
+ const byte *, const byte *, const byte *);
+static int tripledes_ecb_crypt (struct _tripledes_ctx *,
+ const byte *, byte *, int);
+static int is_weak_key ( const byte *key );
+static const char *selftest (void);
+static unsigned int do_tripledes_encrypt(void *context, byte *outbuf,
+ const byte *inbuf );
+static unsigned int do_tripledes_decrypt(void *context, byte *outbuf,
+ const byte *inbuf );
+static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key,
+ unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops);
+
+static int initialized;
+
+
+
+
+/*
+ * The s-box values are permuted according to the 'primitive function P'
+ * and are rotated one bit to the left.
+ */
+static u32 sbox1[64] =
+{
+ 0x01010400, 0x00000000, 0x00010000, 0x01010404, 0x01010004, 0x00010404, 0x00000004, 0x00010000,
+ 0x00000400, 0x01010400, 0x01010404, 0x00000400, 0x01000404, 0x01010004, 0x01000000, 0x00000004,
+ 0x00000404, 0x01000400, 0x01000400, 0x00010400, 0x00010400, 0x01010000, 0x01010000, 0x01000404,
+ 0x00010004, 0x01000004, 0x01000004, 0x00010004, 0x00000000, 0x00000404, 0x00010404, 0x01000000,
+ 0x00010000, 0x01010404, 0x00000004, 0x01010000, 0x01010400, 0x01000000, 0x01000000, 0x00000400,
+ 0x01010004, 0x00010000, 0x00010400, 0x01000004, 0x00000400, 0x00000004, 0x01000404, 0x00010404,
+ 0x01010404, 0x00010004, 0x01010000, 0x01000404, 0x01000004, 0x00000404, 0x00010404, 0x01010400,
+ 0x00000404, 0x01000400, 0x01000400, 0x00000000, 0x00010004, 0x00010400, 0x00000000, 0x01010004
+};
+
+static u32 sbox2[64] =
+{
+ 0x80108020, 0x80008000, 0x00008000, 0x00108020, 0x00100000, 0x00000020, 0x80100020, 0x80008020,
+ 0x80000020, 0x80108020, 0x80108000, 0x80000000, 0x80008000, 0x00100000, 0x00000020, 0x80100020,
+ 0x00108000, 0x00100020, 0x80008020, 0x00000000, 0x80000000, 0x00008000, 0x00108020, 0x80100000,
+ 0x00100020, 0x80000020, 0x00000000, 0x00108000, 0x00008020, 0x80108000, 0x80100000, 0x00008020,
+ 0x00000000, 0x00108020, 0x80100020, 0x00100000, 0x80008020, 0x80100000, 0x80108000, 0x00008000,
+ 0x80100000, 0x80008000, 0x00000020, 0x80108020, 0x00108020, 0x00000020, 0x00008000, 0x80000000,
+ 0x00008020, 0x80108000, 0x00100000, 0x80000020, 0x00100020, 0x80008020, 0x80000020, 0x00100020,
+ 0x00108000, 0x00000000, 0x80008000, 0x00008020, 0x80000000, 0x80100020, 0x80108020, 0x00108000
+};
+
+static u32 sbox3[64] =
+{
+ 0x00000208, 0x08020200, 0x00000000, 0x08020008, 0x08000200, 0x00000000, 0x00020208, 0x08000200,
+ 0x00020008, 0x08000008, 0x08000008, 0x00020000, 0x08020208, 0x00020008, 0x08020000, 0x00000208,
+ 0x08000000, 0x00000008, 0x08020200, 0x00000200, 0x00020200, 0x08020000, 0x08020008, 0x00020208,
+ 0x08000208, 0x00020200, 0x00020000, 0x08000208, 0x00000008, 0x08020208, 0x00000200, 0x08000000,
+ 0x08020200, 0x08000000, 0x00020008, 0x00000208, 0x00020000, 0x08020200, 0x08000200, 0x00000000,
+ 0x00000200, 0x00020008, 0x08020208, 0x08000200, 0x08000008, 0x00000200, 0x00000000, 0x08020008,
+ 0x08000208, 0x00020000, 0x08000000, 0x08020208, 0x00000008, 0x00020208, 0x00020200, 0x08000008,
+ 0x08020000, 0x08000208, 0x00000208, 0x08020000, 0x00020208, 0x00000008, 0x08020008, 0x00020200
+};
+
+static u32 sbox4[64] =
+{
+ 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802080, 0x00800081, 0x00800001, 0x00002001,
+ 0x00000000, 0x00802000, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00800080, 0x00800001,
+ 0x00000001, 0x00002000, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002001, 0x00002080,
+ 0x00800081, 0x00000001, 0x00002080, 0x00800080, 0x00002000, 0x00802080, 0x00802081, 0x00000081,
+ 0x00800080, 0x00800001, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00000000, 0x00802000,
+ 0x00002080, 0x00800080, 0x00800081, 0x00000001, 0x00802001, 0x00002081, 0x00002081, 0x00000080,
+ 0x00802081, 0x00000081, 0x00000001, 0x00002000, 0x00800001, 0x00002001, 0x00802080, 0x00800081,
+ 0x00002001, 0x00002080, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002000, 0x00802080
+};
+
+static u32 sbox5[64] =
+{
+ 0x00000100, 0x02080100, 0x02080000, 0x42000100, 0x00080000, 0x00000100, 0x40000000, 0x02080000,
+ 0x40080100, 0x00080000, 0x02000100, 0x40080100, 0x42000100, 0x42080000, 0x00080100, 0x40000000,
+ 0x02000000, 0x40080000, 0x40080000, 0x00000000, 0x40000100, 0x42080100, 0x42080100, 0x02000100,
+ 0x42080000, 0x40000100, 0x00000000, 0x42000000, 0x02080100, 0x02000000, 0x42000000, 0x00080100,
+ 0x00080000, 0x42000100, 0x00000100, 0x02000000, 0x40000000, 0x02080000, 0x42000100, 0x40080100,
+ 0x02000100, 0x40000000, 0x42080000, 0x02080100, 0x40080100, 0x00000100, 0x02000000, 0x42080000,
+ 0x42080100, 0x00080100, 0x42000000, 0x42080100, 0x02080000, 0x00000000, 0x40080000, 0x42000000,
+ 0x00080100, 0x02000100, 0x40000100, 0x00080000, 0x00000000, 0x40080000, 0x02080100, 0x40000100
+};
+
+static u32 sbox6[64] =
+{
+ 0x20000010, 0x20400000, 0x00004000, 0x20404010, 0x20400000, 0x00000010, 0x20404010, 0x00400000,
+ 0x20004000, 0x00404010, 0x00400000, 0x20000010, 0x00400010, 0x20004000, 0x20000000, 0x00004010,
+ 0x00000000, 0x00400010, 0x20004010, 0x00004000, 0x00404000, 0x20004010, 0x00000010, 0x20400010,
+ 0x20400010, 0x00000000, 0x00404010, 0x20404000, 0x00004010, 0x00404000, 0x20404000, 0x20000000,
+ 0x20004000, 0x00000010, 0x20400010, 0x00404000, 0x20404010, 0x00400000, 0x00004010, 0x20000010,
+ 0x00400000, 0x20004000, 0x20000000, 0x00004010, 0x20000010, 0x20404010, 0x00404000, 0x20400000,
+ 0x00404010, 0x20404000, 0x00000000, 0x20400010, 0x00000010, 0x00004000, 0x20400000, 0x00404010,
+ 0x00004000, 0x00400010, 0x20004010, 0x00000000, 0x20404000, 0x20000000, 0x00400010, 0x20004010
+};
+
+static u32 sbox7[64] =
+{
+ 0x00200000, 0x04200002, 0x04000802, 0x00000000, 0x00000800, 0x04000802, 0x00200802, 0x04200800,
+ 0x04200802, 0x00200000, 0x00000000, 0x04000002, 0x00000002, 0x04000000, 0x04200002, 0x00000802,
+ 0x04000800, 0x00200802, 0x00200002, 0x04000800, 0x04000002, 0x04200000, 0x04200800, 0x00200002,
+ 0x04200000, 0x00000800, 0x00000802, 0x04200802, 0x00200800, 0x00000002, 0x04000000, 0x00200800,
+ 0x04000000, 0x00200800, 0x00200000, 0x04000802, 0x04000802, 0x04200002, 0x04200002, 0x00000002,
+ 0x00200002, 0x04000000, 0x04000800, 0x00200000, 0x04200800, 0x00000802, 0x00200802, 0x04200800,
+ 0x00000802, 0x04000002, 0x04200802, 0x04200000, 0x00200800, 0x00000000, 0x00000002, 0x04200802,
+ 0x00000000, 0x00200802, 0x04200000, 0x00000800, 0x04000002, 0x04000800, 0x00000800, 0x00200002
+};
+
+static u32 sbox8[64] =
+{
+ 0x10001040, 0x00001000, 0x00040000, 0x10041040, 0x10000000, 0x10001040, 0x00000040, 0x10000000,
+ 0x00040040, 0x10040000, 0x10041040, 0x00041000, 0x10041000, 0x00041040, 0x00001000, 0x00000040,
+ 0x10040000, 0x10000040, 0x10001000, 0x00001040, 0x00041000, 0x00040040, 0x10040040, 0x10041000,
+ 0x00001040, 0x00000000, 0x00000000, 0x10040040, 0x10000040, 0x10001000, 0x00041040, 0x00040000,
+ 0x00041040, 0x00040000, 0x10041000, 0x00001000, 0x00000040, 0x10040040, 0x00001000, 0x00041040,
+ 0x10001000, 0x00000040, 0x10000040, 0x10040000, 0x10040040, 0x10000000, 0x00040000, 0x10001040,
+ 0x00000000, 0x10041040, 0x00040040, 0x10000040, 0x10040000, 0x10001000, 0x10001040, 0x00000000,
+ 0x10041040, 0x00041000, 0x00041000, 0x00001040, 0x00001040, 0x00040040, 0x10000000, 0x10041000
+};
+
+
+/*
+ * These two tables are part of the 'permuted choice 1' function.
+ * In this implementation several speed improvements are done.
+ */
+static u32 leftkey_swap[16] =
+{
+ 0x00000000, 0x00000001, 0x00000100, 0x00000101,
+ 0x00010000, 0x00010001, 0x00010100, 0x00010101,
+ 0x01000000, 0x01000001, 0x01000100, 0x01000101,
+ 0x01010000, 0x01010001, 0x01010100, 0x01010101
+};
+
+static u32 rightkey_swap[16] =
+{
+ 0x00000000, 0x01000000, 0x00010000, 0x01010000,
+ 0x00000100, 0x01000100, 0x00010100, 0x01010100,
+ 0x00000001, 0x01000001, 0x00010001, 0x01010001,
+ 0x00000101, 0x01000101, 0x00010101, 0x01010101,
+};
+
+
+
+/*
+ * Numbers of left shifts per round for encryption subkeys.
+ * To calculate the decryption subkeys we just reverse the
+ * ordering of the calculated encryption subkeys. So their
+ * is no need for a decryption rotate tab.
+ */
+static byte encrypt_rotate_tab[16] =
+{
+ 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
+};
+
+
+
+/*
+ * Table with weak DES keys sorted in ascending order.
+ * In DES their are 64 known keys which are weak. They are weak
+ * because they produce only one, two or four different
+ * subkeys in the subkey scheduling process.
+ * The keys in this table have all their parity bits cleared.
+ */
+static byte weak_keys[64][8] =
+{
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /*w*/
+ { 0x00, 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e },
+ { 0x00, 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0 },
+ { 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe },
+ { 0x00, 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e }, /*sw*/
+ { 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00 },
+ { 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe },
+ { 0x00, 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0 },
+ { 0x00, 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0 }, /*sw*/
+ { 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe },
+ { 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00 },
+ { 0x00, 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e },
+ { 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe }, /*sw*/
+ { 0x00, 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0 },
+ { 0x00, 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e },
+ { 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00 },
+ { 0x1e, 0x00, 0x00, 0x1e, 0x0e, 0x00, 0x00, 0x0e },
+ { 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e, 0x00 }, /*sw*/
+ { 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0, 0xfe },
+ { 0x1e, 0x00, 0xfe, 0xe0, 0x0e, 0x00, 0xfe, 0xf0 },
+ { 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00, 0x00 },
+ { 0x1e, 0x1e, 0x1e, 0x1e, 0x0e, 0x0e, 0x0e, 0x0e }, /*w*/
+ { 0x1e, 0x1e, 0xe0, 0xe0, 0x0e, 0x0e, 0xf0, 0xf0 },
+ { 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe, 0xfe },
+ { 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00, 0xfe },
+ { 0x1e, 0xe0, 0x1e, 0xe0, 0x0e, 0xf0, 0x0e, 0xf0 }, /*sw*/
+ { 0x1e, 0xe0, 0xe0, 0x1e, 0x0e, 0xf0, 0xf0, 0x0e },
+ { 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe, 0x00 },
+ { 0x1e, 0xfe, 0x00, 0xe0, 0x0e, 0xfe, 0x00, 0xf0 },
+ { 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e, 0xfe }, /*sw*/
+ { 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0, 0x00 },
+ { 0x1e, 0xfe, 0xfe, 0x1e, 0x0e, 0xfe, 0xfe, 0x0e },
+ { 0xe0, 0x00, 0x00, 0xe0, 0xf0, 0x00, 0x00, 0xf0 },
+ { 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e, 0xfe },
+ { 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0, 0x00 }, /*sw*/
+ { 0xe0, 0x00, 0xfe, 0x1e, 0xf0, 0x00, 0xfe, 0x0e },
+ { 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00, 0xfe },
+ { 0xe0, 0x1e, 0x1e, 0xe0, 0xf0, 0x0e, 0x0e, 0xf0 },
+ { 0xe0, 0x1e, 0xe0, 0x1e, 0xf0, 0x0e, 0xf0, 0x0e }, /*sw*/
+ { 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe, 0x00 },
+ { 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00, 0x00 },
+ { 0xe0, 0xe0, 0x1e, 0x1e, 0xf0, 0xf0, 0x0e, 0x0e },
+ { 0xe0, 0xe0, 0xe0, 0xe0, 0xf0, 0xf0, 0xf0, 0xf0 }, /*w*/
+ { 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe, 0xfe },
+ { 0xe0, 0xfe, 0x00, 0x1e, 0xf0, 0xfe, 0x00, 0x0e },
+ { 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e, 0x00 },
+ { 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0, 0xfe }, /*sw*/
+ { 0xe0, 0xfe, 0xfe, 0xe0, 0xf0, 0xfe, 0xfe, 0xf0 },
+ { 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe },
+ { 0xfe, 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0 },
+ { 0xfe, 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e },
+ { 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00 }, /*sw*/
+ { 0xfe, 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0 },
+ { 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe },
+ { 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00 },
+ { 0xfe, 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e }, /*sw*/
+ { 0xfe, 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e },
+ { 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00 },
+ { 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe },
+ { 0xfe, 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0 }, /*sw*/
+ { 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00 },
+ { 0xfe, 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e },
+ { 0xfe, 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0 },
+ { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe } /*w*/
+};
+static unsigned char weak_keys_chksum[20] = {
+ 0xD0, 0xCF, 0x07, 0x38, 0x93, 0x70, 0x8A, 0x83, 0x7D, 0xD7,
+ 0x8A, 0x36, 0x65, 0x29, 0x6C, 0x1F, 0x7C, 0x3F, 0xD3, 0x41
+};
+
+
+
+/*
+ * Macro to swap bits across two words.
+ */
+#define DO_PERMUTATION(a, temp, b, offset, mask) \
+ temp = ((a>>offset) ^ b) & mask; \
+ b ^= temp; \
+ a ^= temp<<offset;
+
+
+/*
+ * This performs the 'initial permutation' of the data to be encrypted
+ * or decrypted. Additionally the resulting two words are rotated one bit
+ * to the left.
+ */
+#define INITIAL_PERMUTATION(left, temp, right) \
+ DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f) \
+ DO_PERMUTATION(left, temp, right, 16, 0x0000ffff) \
+ DO_PERMUTATION(right, temp, left, 2, 0x33333333) \
+ DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff) \
+ right = (right << 1) | (right >> 31); \
+ temp = (left ^ right) & 0xaaaaaaaa; \
+ right ^= temp; \
+ left ^= temp; \
+ left = (left << 1) | (left >> 31);
+
+/*
+ * The 'inverse initial permutation'.
+ */
+#define FINAL_PERMUTATION(left, temp, right) \
+ left = (left << 31) | (left >> 1); \
+ temp = (left ^ right) & 0xaaaaaaaa; \
+ left ^= temp; \
+ right ^= temp; \
+ right = (right << 31) | (right >> 1); \
+ DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff) \
+ DO_PERMUTATION(right, temp, left, 2, 0x33333333) \
+ DO_PERMUTATION(left, temp, right, 16, 0x0000ffff) \
+ DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f)
+
+
+/*
+ * A full DES round including 'expansion function', 'sbox substitution'
+ * and 'primitive function P' but without swapping the left and right word.
+ * Please note: The data in 'from' and 'to' is already rotated one bit to
+ * the left, done in the initial permutation.
+ */
+#define DES_ROUND(from, to, work, subkey) \
+ work = from ^ *subkey++; \
+ to ^= sbox8[ work & 0x3f ]; \
+ to ^= sbox6[ (work>>8) & 0x3f ]; \
+ to ^= sbox4[ (work>>16) & 0x3f ]; \
+ to ^= sbox2[ (work>>24) & 0x3f ]; \
+ work = ((from << 28) | (from >> 4)) ^ *subkey++; \
+ to ^= sbox7[ work & 0x3f ]; \
+ to ^= sbox5[ (work>>8) & 0x3f ]; \
+ to ^= sbox3[ (work>>16) & 0x3f ]; \
+ to ^= sbox1[ (work>>24) & 0x3f ];
+
+/*
+ * Macros to convert 8 bytes from/to 32bit words.
+ */
+#define READ_64BIT_DATA(data, left, right) \
+ left = buf_get_be32(data + 0); \
+ right = buf_get_be32(data + 4);
+
+#define WRITE_64BIT_DATA(data, left, right) \
+ buf_put_be32(data + 0, left); \
+ buf_put_be32(data + 4, right);
+
+/*
+ * Handy macros for encryption and decryption of data
+ */
+#define des_ecb_encrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 0)
+#define des_ecb_decrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 1)
+#define tripledes_ecb_encrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,0)
+#define tripledes_ecb_decrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,1)
+
+
+
+
+
+
+/*
+ * des_key_schedule(): Calculate 16 subkeys pairs (even/odd) for
+ * 16 encryption rounds.
+ * To calculate subkeys for decryption the caller
+ * have to reorder the generated subkeys.
+ *
+ * rawkey: 8 Bytes of key data
+ * subkey: Array of at least 32 u32s. Will be filled
+ * with calculated subkeys.
+ *
+ */
+static void
+des_key_schedule (const byte * rawkey, u32 * subkey)
+{
+ u32 left, right, work;
+ int round;
+
+ READ_64BIT_DATA (rawkey, left, right)
+
+ DO_PERMUTATION (right, work, left, 4, 0x0f0f0f0f)
+ DO_PERMUTATION (right, work, left, 0, 0x10101010)
+
+ left = ((leftkey_swap[(left >> 0) & 0xf] << 3)
+ | (leftkey_swap[(left >> 8) & 0xf] << 2)
+ | (leftkey_swap[(left >> 16) & 0xf] << 1)
+ | (leftkey_swap[(left >> 24) & 0xf])
+ | (leftkey_swap[(left >> 5) & 0xf] << 7)
+ | (leftkey_swap[(left >> 13) & 0xf] << 6)
+ | (leftkey_swap[(left >> 21) & 0xf] << 5)
+ | (leftkey_swap[(left >> 29) & 0xf] << 4));
+
+ left &= 0x0fffffff;
+
+ right = ((rightkey_swap[(right >> 1) & 0xf] << 3)
+ | (rightkey_swap[(right >> 9) & 0xf] << 2)
+ | (rightkey_swap[(right >> 17) & 0xf] << 1)
+ | (rightkey_swap[(right >> 25) & 0xf])
+ | (rightkey_swap[(right >> 4) & 0xf] << 7)
+ | (rightkey_swap[(right >> 12) & 0xf] << 6)
+ | (rightkey_swap[(right >> 20) & 0xf] << 5)
+ | (rightkey_swap[(right >> 28) & 0xf] << 4));
+
+ right &= 0x0fffffff;
+
+ for (round = 0; round < 16; ++round)
+ {
+ left = ((left << encrypt_rotate_tab[round])
+ | (left >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff;
+ right = ((right << encrypt_rotate_tab[round])
+ | (right >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff;
+
+ *subkey++ = (((left << 4) & 0x24000000)
+ | ((left << 28) & 0x10000000)
+ | ((left << 14) & 0x08000000)
+ | ((left << 18) & 0x02080000)
+ | ((left << 6) & 0x01000000)
+ | ((left << 9) & 0x00200000)
+ | ((left >> 1) & 0x00100000)
+ | ((left << 10) & 0x00040000)
+ | ((left << 2) & 0x00020000)
+ | ((left >> 10) & 0x00010000)
+ | ((right >> 13) & 0x00002000)
+ | ((right >> 4) & 0x00001000)
+ | ((right << 6) & 0x00000800)
+ | ((right >> 1) & 0x00000400)
+ | ((right >> 14) & 0x00000200)
+ | (right & 0x00000100)
+ | ((right >> 5) & 0x00000020)
+ | ((right >> 10) & 0x00000010)
+ | ((right >> 3) & 0x00000008)
+ | ((right >> 18) & 0x00000004)
+ | ((right >> 26) & 0x00000002)
+ | ((right >> 24) & 0x00000001));
+
+ *subkey++ = (((left << 15) & 0x20000000)
+ | ((left << 17) & 0x10000000)
+ | ((left << 10) & 0x08000000)
+ | ((left << 22) & 0x04000000)
+ | ((left >> 2) & 0x02000000)
+ | ((left << 1) & 0x01000000)
+ | ((left << 16) & 0x00200000)
+ | ((left << 11) & 0x00100000)
+ | ((left << 3) & 0x00080000)
+ | ((left >> 6) & 0x00040000)
+ | ((left << 15) & 0x00020000)
+ | ((left >> 4) & 0x00010000)
+ | ((right >> 2) & 0x00002000)
+ | ((right << 8) & 0x00001000)
+ | ((right >> 14) & 0x00000808)
+ | ((right >> 9) & 0x00000400)
+ | ((right) & 0x00000200)
+ | ((right << 7) & 0x00000100)
+ | ((right >> 7) & 0x00000020)
+ | ((right >> 3) & 0x00000011)
+ | ((right << 2) & 0x00000004)
+ | ((right >> 21) & 0x00000002));
+ }
+}
+
+
+/*
+ * Fill a DES context with subkeys calculated from a 64bit key.
+ * Does not check parity bits, but simply ignore them.
+ * Does not check for weak keys.
+ */
+static int
+des_setkey (struct _des_ctx *ctx, const byte * key)
+{
+ static const char *selftest_failed;
+ int i;
+
+ if (!fips_mode () && !initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+
+ if (selftest_failed)
+ log_error ("%s\n", selftest_failed);
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ des_key_schedule (key, ctx->encrypt_subkeys);
+ _gcry_burn_stack (32);
+
+ for(i=0; i<32; i+=2)
+ {
+ ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i];
+ ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i];
+ }
+
+ return 0;
+}
+
+
+
+/*
+ * Electronic Codebook Mode DES encryption/decryption of data according
+ * to 'mode'.
+ */
+static int
+des_ecb_crypt (struct _des_ctx *ctx, const byte * from, byte * to, int mode)
+{
+ u32 left, right, work;
+ u32 *keys;
+
+ keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
+
+ READ_64BIT_DATA (from, left, right)
+ INITIAL_PERMUTATION (left, work, right)
+
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+
+ FINAL_PERMUTATION (right, work, left)
+ WRITE_64BIT_DATA (to, right, left)
+
+ return 0;
+}
+
+
+
+/*
+ * Fill a Triple-DES context with subkeys calculated from two 64bit keys.
+ * Does not check the parity bits of the keys, but simply ignore them.
+ * Does not check for weak keys.
+ */
+static int
+tripledes_set2keys (struct _tripledes_ctx *ctx,
+ const byte * key1,
+ const byte * key2)
+{
+ int i;
+
+ des_key_schedule (key1, ctx->encrypt_subkeys);
+ des_key_schedule (key2, &(ctx->decrypt_subkeys[32]));
+ _gcry_burn_stack (32);
+
+ for(i=0; i<32; i+=2)
+ {
+ ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i];
+ ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i];
+
+ ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i];
+ ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i];
+
+ ctx->encrypt_subkeys[i+64] = ctx->encrypt_subkeys[i];
+ ctx->encrypt_subkeys[i+65] = ctx->encrypt_subkeys[i+1];
+
+ ctx->decrypt_subkeys[i+64] = ctx->decrypt_subkeys[i];
+ ctx->decrypt_subkeys[i+65] = ctx->decrypt_subkeys[i+1];
+ }
+
+ return 0;
+}
+
+
+
+/*
+ * Fill a Triple-DES context with subkeys calculated from three 64bit keys.
+ * Does not check the parity bits of the keys, but simply ignore them.
+ * Does not check for weak keys.
+ */
+static int
+tripledes_set3keys (struct _tripledes_ctx *ctx,
+ const byte * key1,
+ const byte * key2,
+ const byte * key3)
+{
+ static const char *selftest_failed;
+ int i;
+
+ if (!fips_mode () && !initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+
+ if (selftest_failed)
+ log_error ("%s\n", selftest_failed);
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ des_key_schedule (key1, ctx->encrypt_subkeys);
+ des_key_schedule (key2, &(ctx->decrypt_subkeys[32]));
+ des_key_schedule (key3, &(ctx->encrypt_subkeys[64]));
+ _gcry_burn_stack (32);
+
+ for(i=0; i<32; i+=2)
+ {
+ ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[94-i];
+ ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[95-i];
+
+ ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i];
+ ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i];
+
+ ctx->decrypt_subkeys[i+64] = ctx->encrypt_subkeys[30-i];
+ ctx->decrypt_subkeys[i+65] = ctx->encrypt_subkeys[31-i];
+ }
+
+ return 0;
+}
+
+
+
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementation of triple-DES. */
+extern void _gcry_3des_amd64_crypt_block(const void *keys, byte *out,
+ const byte *in);
+
+/* These assembly implementations process three blocks in parallel. */
+extern void _gcry_3des_amd64_ctr_enc(const void *keys, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_3des_amd64_cbc_dec(const void *keys, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out,
+ const byte *in, byte *iv);
+
+#define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *))
+
+
+/*
+ * Electronic Codebook Mode Triple-DES encryption/decryption of data
+ * according to 'mode'. Sometimes this mode is named 'EDE' mode
+ * (Encryption-Decryption-Encryption).
+ */
+static inline int
+tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
+ byte * to, int mode)
+{
+ u32 *keys;
+
+ keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
+
+ _gcry_3des_amd64_crypt_block(keys, to, from);
+
+ return 0;
+}
+
+static inline void
+tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr)
+{
+ _gcry_3des_amd64_ctr_enc(keys, out, in, ctr);
+}
+
+static inline void
+tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+ _gcry_3des_amd64_cbc_dec(keys, out, in, iv);
+}
+
+static inline void
+tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+ _gcry_3des_amd64_cfb_dec(keys, out, in, iv);
+}
+
+#else /*USE_AMD64_ASM*/
+
+#define TRIPLEDES_ECB_BURN_STACK 32
+
+/*
+ * Electronic Codebook Mode Triple-DES encryption/decryption of data
+ * according to 'mode'. Sometimes this mode is named 'EDE' mode
+ * (Encryption-Decryption-Encryption).
+ */
+static int
+tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
+ byte * to, int mode)
+{
+ u32 left, right, work;
+ u32 *keys;
+
+ keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
+
+ READ_64BIT_DATA (from, left, right)
+ INITIAL_PERMUTATION (left, work, right)
+
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+ DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
+
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+ DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
+
+ FINAL_PERMUTATION (right, work, left)
+ WRITE_64BIT_DATA (to, right, left)
+
+ return 0;
+}
+
+#endif /*!USE_AMD64_ASM*/
+
+
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size DES_BLOCKSIZE. */
+static void
+_gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ struct _tripledes_ctx *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[DES_BLOCKSIZE];
+ int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+
+#ifdef USE_AMD64_ASM
+ {
+ int asm_burn_depth = 9 * sizeof(void *);
+
+ if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+ burn_stack_depth = asm_burn_depth;
+
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
+
+ nblocks -= 3;
+ outbuf += 3 * DES_BLOCKSIZE;
+ inbuf += 3 * DES_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ tripledes_ecb_encrypt (ctx, ctr, tmpbuf);
+ /* XOR the input with the encrypted counter and store in output. */
+ cipher_block_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
+ outbuf += DES_BLOCKSIZE;
+ inbuf += DES_BLOCKSIZE;
+ /* Increment the counter. */
+ cipher_block_add(ctr, 1, DES_BLOCKSIZE);
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ struct _tripledes_ctx *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[DES_BLOCKSIZE];
+ int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+
+#ifdef USE_AMD64_ASM
+ {
+ int asm_burn_depth = 10 * sizeof(void *);
+
+ if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+ burn_stack_depth = asm_burn_depth;
+
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
+
+ nblocks -= 3;
+ outbuf += 3 * DES_BLOCKSIZE;
+ inbuf += 3 * DES_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ tripledes_ecb_decrypt (ctx, inbuf, savebuf);
+
+ cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
+ inbuf += DES_BLOCKSIZE;
+ outbuf += DES_BLOCKSIZE;
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ struct _tripledes_ctx *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+
+#ifdef USE_AMD64_ASM
+ {
+ int asm_burn_depth = 9 * sizeof(void *);
+
+ if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+ burn_stack_depth = asm_burn_depth;
+
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
+
+ nblocks -= 3;
+ outbuf += 3 * DES_BLOCKSIZE;
+ inbuf += 3 * DES_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ tripledes_ecb_encrypt (ctx, iv, iv);
+ cipher_block_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
+ outbuf += DES_BLOCKSIZE;
+ inbuf += DES_BLOCKSIZE;
+ }
+
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/*
+ * Check whether the 8 byte key is weak.
+ * Does not check the parity bits of the key but simple ignore them.
+ */
+static int
+is_weak_key ( const byte *key )
+{
+ byte work[8];
+ int i, left, right, middle, cmp_result;
+
+ /* clear parity bits */
+ for(i=0; i<8; ++i)
+ work[i] = key[i] & 0xfe;
+
+ /* binary search in the weak key table */
+ left = 0;
+ right = 63;
+ while(left <= right)
+ {
+ middle = (left + right) / 2;
+
+ if ( !(cmp_result=working_memcmp(work, weak_keys[middle], 8)) )
+ return -1;
+
+ if ( cmp_result > 0 )
+ left = middle + 1;
+ else
+ right = middle - 1;
+ }
+
+ return 0;
+}
+
+
+/* Alternative setkey for selftests; need larger key than default. */
+static gcry_err_code_t
+bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ static const unsigned char key[24] ATTR_ALIGNED_16 = {
+ 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+ 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22,
+ 0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82
+ };
+
+ (void)__key;
+ (void)__keylen;
+
+ return do_tripledes_setkey(context, key, sizeof(key), bulk_ops);
+}
+
+
+/* Run the self-tests for DES-CTR, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+ const int nblocks = 3+1;
+ const int blocksize = DES_BLOCKSIZE;
+ const int context_size = sizeof(struct _tripledes_ctx);
+
+ return _gcry_selftest_helper_ctr("3DES", &bulk_selftest_setkey,
+ &do_tripledes_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for DES-CBC, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+ const int nblocks = 3+2;
+ const int blocksize = DES_BLOCKSIZE;
+ const int context_size = sizeof(struct _tripledes_ctx);
+
+ return _gcry_selftest_helper_cbc("3DES", &bulk_selftest_setkey,
+ &do_tripledes_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for DES-CFB, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+ const int nblocks = 3+2;
+ const int blocksize = DES_BLOCKSIZE;
+ const int context_size = sizeof(struct _tripledes_ctx);
+
+ return _gcry_selftest_helper_cfb("3DES", &bulk_selftest_setkey,
+ &do_tripledes_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/*
+ * Performs a selftest of this DES/Triple-DES implementation.
+ * Returns an string with the error text on failure.
+ * Returns NULL if all is ok.
+ */
+static const char *
+selftest (void)
+{
+ const char *r;
+
+ /*
+ * Check if 'u32' is really 32 bits wide. This DES / 3DES implementation
+ * need this.
+ */
+ if (sizeof (u32) != 4)
+ return "Wrong word size for DES configured.";
+
+ /*
+ * DES Maintenance Test
+ */
+ {
+ int i;
+ byte key[8] =
+ {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55};
+ byte input[8] =
+ {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ byte result[8] =
+ {0x24, 0x6e, 0x9d, 0xb9, 0xc5, 0x50, 0x38, 0x1a};
+ byte temp1[8], temp2[8], temp3[8];
+ des_ctx des;
+
+ for (i = 0; i < 64; ++i)
+ {
+ des_setkey (des, key);
+ des_ecb_encrypt (des, input, temp1);
+ des_ecb_encrypt (des, temp1, temp2);
+ des_setkey (des, temp2);
+ des_ecb_decrypt (des, temp1, temp3);
+ memcpy (key, temp3, 8);
+ memcpy (input, temp1, 8);
+ }
+ if (memcmp (temp3, result, 8))
+ return "DES maintenance test failed.";
+ }
+
+
+ /*
+ * Self made Triple-DES test (Does somebody know an official test?)
+ */
+ {
+ int i;
+ byte input[8] =
+ {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10};
+ byte key1[8] =
+ {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0};
+ byte key2[8] =
+ {0x11, 0x22, 0x33, 0x44, 0xff, 0xaa, 0xcc, 0xdd};
+ byte result[8] =
+ {0x7b, 0x38, 0x3b, 0x23, 0xa2, 0x7d, 0x26, 0xd3};
+
+ tripledes_ctx des3;
+
+ for (i = 0; i < 16; ++i)
+ {
+ tripledes_set2keys (des3, key1, key2);
+ tripledes_ecb_encrypt (des3, input, key1);
+ tripledes_ecb_decrypt (des3, input, key2);
+ tripledes_set3keys (des3, key1, input, key2);
+ tripledes_ecb_encrypt (des3, input, input);
+ }
+ if (memcmp (input, result, 8))
+ return "Triple-DES test failed.";
+ }
+
+ /*
+ * More Triple-DES test. These are testvectors as used by SSLeay,
+ * thanks to Jeroen C. van Gelderen.
+ */
+ {
+ static const struct { byte key[24]; byte plain[8]; byte cipher[8]; }
+ testdata[] = {
+ { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 },
+ { 0x95,0xF8,0xA5,0xE5,0xDD,0x31,0xD9,0x00 },
+ { 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }
+ },
+
+ { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 },
+ { 0x9D,0x64,0x55,0x5A,0x9A,0x10,0xB8,0x52, },
+ { 0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00 }
+ },
+ { { 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E,
+ 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E,
+ 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E },
+ { 0x51,0x45,0x4B,0x58,0x2D,0xDF,0x44,0x0A },
+ { 0x71,0x78,0x87,0x6E,0x01,0xF1,0x9B,0x2A }
+ },
+ { { 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6,
+ 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6,
+ 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6 },
+ { 0x42,0xFD,0x44,0x30,0x59,0x57,0x7F,0xA2 },
+ { 0xAF,0x37,0xFB,0x42,0x1F,0x8C,0x40,0x95 }
+ },
+ { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
+ 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
+ 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF },
+ { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 },
+ { 0x3D,0x12,0x4F,0xE2,0x19,0x8B,0xA3,0x18 }
+ },
+ { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
+ 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,
+ 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF },
+ { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 },
+ { 0xFB,0xAB,0xA1,0xFF,0x9D,0x05,0xE9,0xB1 }
+ },
+ { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
+ 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,
+ 0xFE,0xDC,0xBA,0x98,0x76,0x54,0x32,0x10 },
+ { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 },
+ { 0x18,0xd7,0x48,0xe5,0x63,0x62,0x05,0x72 }
+ },
+ { { 0x03,0x52,0x02,0x07,0x67,0x20,0x82,0x17,
+ 0x86,0x02,0x87,0x66,0x59,0x08,0x21,0x98,
+ 0x64,0x05,0x6A,0xBD,0xFE,0xA9,0x34,0x57 },
+ { 0x73,0x71,0x75,0x69,0x67,0x67,0x6C,0x65 },
+ { 0xc0,0x7d,0x2a,0x0f,0xa5,0x66,0xfa,0x30 }
+ },
+ { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+ 0x80,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02 },
+ { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
+ { 0xe6,0xe6,0xdd,0x5b,0x7e,0x72,0x29,0x74 }
+ },
+ { { 0x10,0x46,0x10,0x34,0x89,0x98,0x80,0x20,
+ 0x91,0x07,0xD0,0x15,0x89,0x19,0x01,0x01,
+ 0x19,0x07,0x92,0x10,0x98,0x1A,0x01,0x01 },
+ { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
+ { 0xe1,0xef,0x62,0xc3,0x32,0xfe,0x82,0x5b }
+ }
+ };
+
+ byte result[8];
+ int i;
+ tripledes_ctx des3;
+
+ for (i=0; i<sizeof(testdata)/sizeof(*testdata); ++i)
+ {
+ tripledes_set3keys (des3, testdata[i].key,
+ testdata[i].key + 8, testdata[i].key + 16);
+
+ tripledes_ecb_encrypt (des3, testdata[i].plain, result);
+ if (memcmp (testdata[i].cipher, result, 8))
+ return "Triple-DES SSLeay test failed on encryption.";
+
+ tripledes_ecb_decrypt (des3, testdata[i].cipher, result);
+ if (memcmp (testdata[i].plain, result, 8))
+ return "Triple-DES SSLeay test failed on decryption.";;
+ }
+ }
+
+ /*
+ * Check the weak key detection. We simply assume that the table
+ * with weak keys is ok and check every key in the table if it is
+ * detected... (This test is a little bit stupid).
+ */
+ {
+ int i;
+ unsigned char *p;
+ gcry_md_hd_t h;
+
+ if (_gcry_md_open (&h, GCRY_MD_SHA1, 0))
+ return "SHA1 not available";
+
+ for (i = 0; i < 64; ++i)
+ _gcry_md_write (h, weak_keys[i], 8);
+ p = _gcry_md_read (h, GCRY_MD_SHA1);
+ i = memcmp (p, weak_keys_chksum, 20);
+ _gcry_md_close (h);
+ if (i)
+ return "weak key table defect";
+
+ for (i = 0; i < 64; ++i)
+ if (!is_weak_key(weak_keys[i]))
+ return "DES weak key detection failed";
+ }
+
+ if ( (r = selftest_cbc ()) )
+ return r;
+
+ if ( (r = selftest_cfb ()) )
+ return r;
+
+ if ( (r = selftest_ctr ()) )
+ return r;
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+do_tripledes_setkey ( void *context, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops )
+{
+ struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
+
+ if( keylen != 24 )
+ return GPG_ERR_INV_KEYLEN;
+
+ /* Setup bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cbc_dec = _gcry_3des_cbc_dec;
+ bulk_ops->cfb_dec = _gcry_3des_cfb_dec;
+ bulk_ops->ctr_enc = _gcry_3des_ctr_enc;
+
+ tripledes_set3keys ( ctx, key, key+8, key+16);
+
+ if (ctx->flags.no_weak_key)
+ ; /* Detection has been disabled. */
+ else if (is_weak_key (key) || is_weak_key (key+8) || is_weak_key (key+16))
+ {
+ _gcry_burn_stack (64);
+ return GPG_ERR_WEAK_KEY;
+ }
+ _gcry_burn_stack (64);
+
+ return GPG_ERR_NO_ERROR;
+}
+
+
+static gcry_err_code_t
+do_tripledes_set_extra_info (void *context, int what,
+ const void *buffer, size_t buflen)
+{
+ struct _tripledes_ctx *ctx = (struct _tripledes_ctx *)context;
+ gpg_err_code_t ec = 0;
+
+ (void)buffer;
+ (void)buflen;
+
+ switch (what)
+ {
+ case CIPHER_INFO_NO_WEAK_KEY:
+ ctx->flags.no_weak_key = 1;
+ break;
+
+ default:
+ ec = GPG_ERR_INV_OP;
+ break;
+ }
+ return ec;
+}
+
+
+static unsigned int
+do_tripledes_encrypt( void *context, byte *outbuf, const byte *inbuf )
+{
+ struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
+
+ tripledes_ecb_encrypt ( ctx, inbuf, outbuf );
+ return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
+}
+
+static unsigned int
+do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf )
+{
+ struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
+ tripledes_ecb_decrypt ( ctx, inbuf, outbuf );
+ return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
+}
+
+static gcry_err_code_t
+do_des_setkey (void *context, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ struct _des_ctx *ctx = (struct _des_ctx *) context;
+
+ (void)bulk_ops;
+
+ if (keylen != 8)
+ return GPG_ERR_INV_KEYLEN;
+
+ des_setkey (ctx, key);
+
+ if (is_weak_key (key)) {
+ _gcry_burn_stack (64);
+ return GPG_ERR_WEAK_KEY;
+ }
+ _gcry_burn_stack (64);
+
+ return GPG_ERR_NO_ERROR;
+}
+
+
+static unsigned int
+do_des_encrypt( void *context, byte *outbuf, const byte *inbuf )
+{
+ struct _des_ctx *ctx = (struct _des_ctx *) context;
+
+ des_ecb_encrypt ( ctx, inbuf, outbuf );
+ return /*burn_stack*/ (32);
+}
+
+static unsigned int
+do_des_decrypt( void *context, byte *outbuf, const byte *inbuf )
+{
+ struct _des_ctx *ctx = (struct _des_ctx *) context;
+
+ des_ecb_decrypt ( ctx, inbuf, outbuf );
+ return /*burn_stack*/ (32);
+}
+
+
+
+
+/*
+ Self-test section.
+ */
+
+
+/* Selftest for TripleDES. */
+static gpg_err_code_t
+selftest_fips (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ (void)extended; /* No extended tests available. */
+
+ what = "low-level";
+ errtxt = selftest ();
+ if (errtxt)
+ goto failed;
+
+ /* The low-level self-tests are quite extensive and thus we can do
+ without high level tests. This is also justified because we have
+ no custom block code implementation for 3des but always use the
+ standard high level block code. */
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("cipher", GCRY_CIPHER_3DES, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_CIPHER_3DES:
+ ec = selftest_fips (extended, report);
+ break;
+ default:
+ ec = GPG_ERR_CIPHER_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_des =
+ {
+ GCRY_CIPHER_DES, {0, 0},
+ "DES", NULL, NULL, 8, 64, sizeof (struct _des_ctx),
+ do_des_setkey, do_des_encrypt, do_des_decrypt
+ };
+
+static gcry_cipher_oid_spec_t oids_tripledes[] =
+ {
+ { "1.2.840.113549.3.7", GCRY_CIPHER_MODE_CBC },
+ /* Teletrust specific OID for 3DES. */
+ { "1.3.36.3.1.3.2.1", GCRY_CIPHER_MODE_CBC },
+ /* pbeWithSHAAnd3_KeyTripleDES_CBC */
+ { "1.2.840.113549.1.12.1.3", GCRY_CIPHER_MODE_CBC },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_tripledes =
+ {
+ GCRY_CIPHER_3DES, {0, 1},
+ "3DES", NULL, oids_tripledes, 8, 192, sizeof (struct _tripledes_ctx),
+ do_tripledes_setkey, do_tripledes_encrypt, do_tripledes_decrypt,
+ NULL, NULL,
+ run_selftests,
+ do_tripledes_set_extra_info
+ };
diff --git a/comm/third_party/libgcrypt/cipher/dsa-common.c b/comm/third_party/libgcrypt/cipher/dsa-common.c
new file mode 100644
index 0000000000..fe49248dd6
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/dsa-common.c
@@ -0,0 +1,418 @@
+/* dsa-common.c - Common code for DSA
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+/*
+ * Modify K, so that computation time difference can be small,
+ * by making K large enough.
+ *
+ * Originally, (EC)DSA computation requires k where 0 < k < q. Here,
+ * we add q (the order), to keep k in a range: q < k < 2*q (or,
+ * addming more q, to keep k in a range: 2*q < k < 3*q), so that
+ * timing difference of the EC multiply (or exponentiation) operation
+ * can be small. The result of (EC)DSA computation is same.
+ */
+void
+_gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits)
+{
+ gcry_mpi_t k1 = mpi_new (qbits+2);
+
+ mpi_resize (k, (qbits+2+BITS_PER_MPI_LIMB-1) / BITS_PER_MPI_LIMB);
+ k->nlimbs = k->alloced;
+ mpi_add (k, k, q);
+ mpi_add (k1, k, q);
+ mpi_set_cond (k, k1, !mpi_test_bit (k, qbits));
+
+ mpi_free (k1);
+}
+
+/*
+ * Generate a random secret exponent K less than Q.
+ * Note that ECDSA uses this code also to generate D.
+ */
+gcry_mpi_t
+_gcry_dsa_gen_k (gcry_mpi_t q, int security_level)
+{
+ gcry_mpi_t k = mpi_alloc_secure (mpi_get_nlimbs (q));
+ unsigned int nbits = mpi_get_nbits (q);
+ unsigned int nbytes = (nbits+7)/8;
+ char *rndbuf = NULL;
+
+ /* To learn why we don't use mpi_mod to get the requested bit size,
+ read the paper: "The Insecurity of the Digital Signature
+ Algorithm with Partially Known Nonces" by Nguyen and Shparlinski.
+ Journal of Cryptology, New York. Vol 15, nr 3 (2003) */
+
+ if (DBG_CIPHER)
+ log_debug ("choosing a random k of %u bits at seclevel %d\n",
+ nbits, security_level);
+ for (;;)
+ {
+ if ( !rndbuf || nbits < 32 )
+ {
+ xfree (rndbuf);
+ rndbuf = _gcry_random_bytes_secure (nbytes, security_level);
+ }
+ else
+ { /* Change only some of the higher bits. We could improve
+ this by directly requesting more memory at the first call
+ to get_random_bytes() and use these extra bytes here.
+ However the required management code is more complex and
+ thus we better use this simple method. */
+ char *pp = _gcry_random_bytes_secure (4, security_level);
+ memcpy (rndbuf, pp, 4);
+ xfree (pp);
+ }
+ _gcry_mpi_set_buffer (k, rndbuf, nbytes, 0);
+
+ /* Make sure we have the requested number of bits. This code
+ looks a bit funny but it is easy to understand if you
+ consider that mpi_set_highbit clears all higher bits. We
+ don't have a clear_highbit, thus we first set the high bit
+ and then clear it again. */
+ if (mpi_test_bit (k, nbits-1))
+ mpi_set_highbit (k, nbits-1);
+ else
+ {
+ mpi_set_highbit (k, nbits-1);
+ mpi_clear_bit (k, nbits-1);
+ }
+
+ if (!(mpi_cmp (k, q) < 0)) /* check: k < q */
+ {
+ if (DBG_CIPHER)
+ log_debug ("\tk too large - again\n");
+ continue; /* no */
+ }
+ if (!(mpi_cmp_ui (k, 0) > 0)) /* check: k > 0 */
+ {
+ if (DBG_CIPHER)
+ log_debug ("\tk is zero - again\n");
+ continue; /* no */
+ }
+ break; /* okay */
+ }
+ xfree (rndbuf);
+
+ return k;
+}
+
+
+/* Turn VALUE into an octet string and store it in an allocated buffer
+ at R_FRAME. If the resulting octet string is shorter than NBYTES
+ the result will be left padded with zeroes. If VALUE does not fit
+ into NBYTES an error code is returned. */
+static gpg_err_code_t
+int2octets (unsigned char **r_frame, gcry_mpi_t value, size_t nbytes)
+{
+ gpg_err_code_t rc;
+ size_t nframe, noff, n;
+ unsigned char *frame;
+
+ rc = _gcry_mpi_print (GCRYMPI_FMT_USG, NULL, 0, &nframe, value);
+ if (rc)
+ return rc;
+ if (nframe > nbytes)
+ return GPG_ERR_TOO_LARGE; /* Value too long to fit into NBYTES. */
+
+ noff = (nframe < nbytes)? nbytes - nframe : 0;
+ n = nframe + noff;
+ frame = mpi_is_secure (value)? xtrymalloc_secure (n) : xtrymalloc (n);
+ if (!frame)
+ return gpg_err_code_from_syserror ();
+ if (noff)
+ memset (frame, 0, noff);
+ nframe += noff;
+ rc = _gcry_mpi_print (GCRYMPI_FMT_USG, frame+noff, nframe-noff, NULL, value);
+ if (rc)
+ {
+ xfree (frame);
+ return rc;
+ }
+
+ *r_frame = frame;
+ return 0;
+}
+
+
+/* Connert the bit string BITS of length NBITS into an octet string
+ with a length of (QBITS+7)/8 bytes. On success store the result at
+ R_FRAME. */
+static gpg_err_code_t
+bits2octets (unsigned char **r_frame,
+ const void *bits, unsigned int nbits,
+ gcry_mpi_t q, unsigned int qbits)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t z1;
+
+ /* z1 = bits2int (b) */
+ rc = _gcry_mpi_scan (&z1, GCRYMPI_FMT_USG, bits, (nbits+7)/8, NULL);
+ if (rc)
+ return rc;
+ if (nbits > qbits)
+ mpi_rshift (z1, z1, nbits - qbits);
+
+ /* z2 - z1 mod q */
+ if (mpi_cmp (z1, q) >= 0)
+ mpi_sub (z1, z1, q);
+
+ /* Convert to an octet string. */
+ rc = int2octets (r_frame, z1, (qbits+7)/8);
+
+ mpi_free (z1);
+ return rc;
+}
+
+
+/*
+ * Generate a deterministic secret exponent K less than DSA_Q. H1 is
+ * the to be signed digest with a length of HLEN bytes. HALGO is the
+ * algorithm used to create the hash. On success the value for K is
+ * stored at R_K.
+ */
+gpg_err_code_t
+_gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k,
+ gcry_mpi_t dsa_q, gcry_mpi_t dsa_x,
+ const unsigned char *h1, unsigned int hlen,
+ int halgo, unsigned int extraloops)
+{
+ gpg_err_code_t rc;
+ unsigned char *V = NULL;
+ unsigned char *K = NULL;
+ unsigned char *x_buf = NULL;
+ unsigned char *h1_buf = NULL;
+ gcry_md_hd_t hd = NULL;
+ unsigned char *t = NULL;
+ gcry_mpi_t k = NULL;
+ unsigned int tbits, qbits;
+ int i;
+
+ qbits = mpi_get_nbits (dsa_q);
+
+ if (!qbits || !h1 || !hlen)
+ return GPG_ERR_EINVAL;
+
+ if (_gcry_md_get_algo_dlen (halgo) != hlen)
+ return GPG_ERR_DIGEST_ALGO;
+
+ /* Step b: V = 0x01 0x01 0x01 ... 0x01 */
+ V = xtrymalloc (hlen);
+ if (!V)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+ for (i=0; i < hlen; i++)
+ V[i] = 1;
+
+ /* Step c: K = 0x00 0x00 0x00 ... 0x00 */
+ K = xtrycalloc (1, hlen);
+ if (!K)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ rc = int2octets (&x_buf, dsa_x, (qbits+7)/8);
+ if (rc)
+ goto leave;
+
+ rc = bits2octets (&h1_buf, h1, hlen*8, dsa_q, qbits);
+ if (rc)
+ goto leave;
+
+ /* Create a handle to compute the HMACs. */
+ rc = _gcry_md_open (&hd, halgo, (GCRY_MD_FLAG_SECURE | GCRY_MD_FLAG_HMAC));
+ if (rc)
+ goto leave;
+
+ /* Step d: K = HMAC_K(V || 0x00 || int2octets(x) || bits2octets(h1) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ _gcry_md_write (hd, "", 1);
+ _gcry_md_write (hd, x_buf, (qbits+7)/8);
+ _gcry_md_write (hd, h1_buf, (qbits+7)/8);
+ memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+ /* Step e: V = HMAC_K(V) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+ /* Step f: K = HMAC_K(V || 0x01 || int2octets(x) || bits2octets(h1) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ _gcry_md_write (hd, "\x01", 1);
+ _gcry_md_write (hd, x_buf, (qbits+7)/8);
+ _gcry_md_write (hd, h1_buf, (qbits+7)/8);
+ memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+ /* Step g: V = HMAC_K(V) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+ /* Step h. */
+ t = xtrymalloc_secure ((qbits+7)/8+hlen);
+ if (!t)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ again:
+ for (tbits = 0; tbits < qbits;)
+ {
+ /* V = HMAC_K(V) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+ /* T = T || V */
+ memcpy (t+(tbits+7)/8, V, hlen);
+ tbits += 8*hlen;
+ }
+
+ /* k = bits2int (T) */
+ mpi_free (k);
+ k = NULL;
+ rc = _gcry_mpi_scan (&k, GCRYMPI_FMT_USG, t, (tbits+7)/8, NULL);
+ if (rc)
+ goto leave;
+ if (tbits > qbits)
+ mpi_rshift (k, k, tbits - qbits);
+
+ /* Check: k < q and k > 1 */
+ if (!(mpi_cmp (k, dsa_q) < 0 && mpi_cmp_ui (k, 0) > 0))
+ {
+ /* K = HMAC_K(V || 0x00) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ _gcry_md_write (hd, "", 1);
+ memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+ /* V = HMAC_K(V) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+ goto again;
+ }
+
+ /* The caller may have requested that we introduce some extra loops.
+ This is for example useful if the caller wants another value for
+ K because the last returned one yielded an R of 0. Because this
+ is very unlikely we implement it in a straightforward way. */
+ if (extraloops)
+ {
+ extraloops--;
+
+ /* K = HMAC_K(V || 0x00) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ _gcry_md_write (hd, "", 1);
+ memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+ /* V = HMAC_K(V) */
+ rc = _gcry_md_setkey (hd, K, hlen);
+ if (rc)
+ goto leave;
+ _gcry_md_write (hd, V, hlen);
+ memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+ goto again;
+ }
+
+ /* log_mpidump (" k", k); */
+
+ leave:
+ xfree (t);
+ _gcry_md_close (hd);
+ xfree (h1_buf);
+ xfree (x_buf);
+ xfree (K);
+ xfree (V);
+
+ if (rc)
+ mpi_free (k);
+ else
+ *r_k = k;
+ return rc;
+}
+
+/*
+ * Truncate opaque hash value to qbits for DSA.
+ * Non-opaque input is not truncated, in hope that user
+ * knows what is passed. It is not possible to correctly
+ * trucate non-opaque inputs.
+ */
+gpg_err_code_t
+_gcry_dsa_normalize_hash (gcry_mpi_t input,
+ gcry_mpi_t *out,
+ unsigned int qbits)
+{
+ gpg_err_code_t rc = 0;
+ const void *abuf;
+ unsigned int abits;
+ gcry_mpi_t hash;
+
+ if (mpi_is_opaque (input))
+ {
+ abuf = mpi_get_opaque (input, &abits);
+ rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL);
+ if (rc)
+ return rc;
+ if (abits > qbits)
+ mpi_rshift (hash, hash, abits - qbits);
+ }
+ else
+ hash = input;
+
+ *out = hash;
+
+ return rc;
+}
diff --git a/comm/third_party/libgcrypt/cipher/dsa.c b/comm/third_party/libgcrypt/cipher/dsa.c
new file mode 100644
index 0000000000..d793b9aaf2
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/dsa.c
@@ -0,0 +1,1394 @@
+/* dsa.c - DSA signature algorithm
+ * Copyright (C) 1998, 2000, 2001, 2002, 2003,
+ * 2006, 2008 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+typedef struct
+{
+ gcry_mpi_t p; /* prime */
+ gcry_mpi_t q; /* group order */
+ gcry_mpi_t g; /* group generator */
+ gcry_mpi_t y; /* g^x mod p */
+} DSA_public_key;
+
+
+typedef struct
+{
+ gcry_mpi_t p; /* prime */
+ gcry_mpi_t q; /* group order */
+ gcry_mpi_t g; /* group generator */
+ gcry_mpi_t y; /* g^x mod p */
+ gcry_mpi_t x; /* secret exponent */
+} DSA_secret_key;
+
+
+/* A structure used to hold domain parameters. */
+typedef struct
+{
+ gcry_mpi_t p; /* prime */
+ gcry_mpi_t q; /* group order */
+ gcry_mpi_t g; /* group generator */
+} dsa_domain_t;
+
+
+static const char *dsa_names[] =
+ {
+ "dsa",
+ "openpgp-dsa",
+ NULL,
+ };
+
+
+/* A sample 1024 bit DSA key used for the selftests. Not anymore
+ * used, kept only for reference. */
+#if 0
+static const char sample_secret_key_1024[] =
+"(private-key"
+" (dsa"
+" (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB"
+" 96273899DD8B2BB46CD6ECA263FAF04A28903503D59062A8865D2AE8ADFB5191"
+" CF36FFB562D0E2F5809801A1F675DAE59698A9E01EFE8D7DCFCA084F4C6F5A44"
+" 44D499A06FFAEA5E8EF5E01F2FD20A7B7EF3F6968AFBA1FB8D91F1559D52D8777B#)"
+" (q #00EB7B5751D25EBBB7BD59D920315FD840E19AEBF9#)"
+" (g #1574363387FDFD1DDF38F4FBE135BB20C7EE4772FB94C337AF86EA8E49666503"
+" AE04B6BE81A2F8DD095311E0217ACA698A11E6C5D33CCDAE71498ED35D13991E"
+" B02F09AB40BD8F4C5ED8C75DA779D0AE104BC34C960B002377068AB4B5A1F984"
+" 3FBA91F537F1B7CAC4D8DD6D89B0D863AF7025D549F9C765D2FC07EE208F8D15#)"
+" (y #64B11EF8871BE4AB572AA810D5D3CA11A6CDBC637A8014602C72960DB135BF46"
+" A1816A724C34F87330FC9E187C5D66897A04535CC2AC9164A7150ABFA8179827"
+" 6E45831AB811EEE848EBB24D9F5F2883B6E5DDC4C659DEF944DCFD80BF4D0A20"
+" 42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)"
+" (x #11D54E4ADBD3034160F2CED4B7CD292A4EBF3EC0#)))";
+/* A sample 1024 bit DSA key used for the selftests (public only). */
+static const char sample_public_key_1024[] =
+"(public-key"
+" (dsa"
+" (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB"
+" 96273899DD8B2BB46CD6ECA263FAF04A28903503D59062A8865D2AE8ADFB5191"
+" CF36FFB562D0E2F5809801A1F675DAE59698A9E01EFE8D7DCFCA084F4C6F5A44"
+" 44D499A06FFAEA5E8EF5E01F2FD20A7B7EF3F6968AFBA1FB8D91F1559D52D8777B#)"
+" (q #00EB7B5751D25EBBB7BD59D920315FD840E19AEBF9#)"
+" (g #1574363387FDFD1DDF38F4FBE135BB20C7EE4772FB94C337AF86EA8E49666503"
+" AE04B6BE81A2F8DD095311E0217ACA698A11E6C5D33CCDAE71498ED35D13991E"
+" B02F09AB40BD8F4C5ED8C75DA779D0AE104BC34C960B002377068AB4B5A1F984"
+" 3FBA91F537F1B7CAC4D8DD6D89B0D863AF7025D549F9C765D2FC07EE208F8D15#)"
+" (y #64B11EF8871BE4AB572AA810D5D3CA11A6CDBC637A8014602C72960DB135BF46"
+" A1816A724C34F87330FC9E187C5D66897A04535CC2AC9164A7150ABFA8179827"
+" 6E45831AB811EEE848EBB24D9F5F2883B6E5DDC4C659DEF944DCFD80BF4D0A20"
+" 42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)))";
+#endif /*0*/
+
+/* 2048 DSA key from RFC 6979 A.2.2 */
+static const char sample_public_key_2048[] =
+"(public-key"
+" (dsa"
+" (p #9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)"
+" (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)"
+" (g #5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)"
+" (y #667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)))";
+
+static const char sample_secret_key_2048[] =
+"(private-key"
+" (dsa"
+" (p #9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)"
+" (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)"
+" (g #5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)"
+" (y #667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)"
+" (x #69C7548C21D0DFEA6B9A51C9EAD4E27C33D3B3F180316E5BCAB92C933F0E4DBC#)))";
+
+
+
+static int test_keys (DSA_secret_key *sk, unsigned int qbits);
+static int check_secret_key (DSA_secret_key *sk);
+static gpg_err_code_t generate (DSA_secret_key *sk,
+ unsigned int nbits,
+ unsigned int qbits,
+ int transient_key,
+ dsa_domain_t *domain,
+ gcry_mpi_t **ret_factors);
+static gpg_err_code_t sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input,
+ DSA_secret_key *skey, int flags, int hashalgo);
+static gpg_err_code_t verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input,
+ DSA_public_key *pkey);
+static unsigned int dsa_get_nbits (gcry_sexp_t parms);
+
+
+static void (*progress_cb) (void *,const char *, int, int, int );
+static void *progress_cb_data;
+
+
+void
+_gcry_register_pk_dsa_progress (void (*cb) (void *, const char *,
+ int, int, int),
+ void *cb_data)
+{
+ progress_cb = cb;
+ progress_cb_data = cb_data;
+}
+
+
+static void
+progress (int c)
+{
+ if (progress_cb)
+ progress_cb (progress_cb_data, "pk_dsa", c, 0, 0);
+}
+
+
+/* Check that a freshly generated key actually works. Returns 0 on success. */
+static int
+test_keys (DSA_secret_key *sk, unsigned int qbits)
+{
+ int result = -1; /* Default to failure. */
+ DSA_public_key pk;
+ gcry_mpi_t data = mpi_new (qbits);
+ gcry_mpi_t sig_a = mpi_new (qbits);
+ gcry_mpi_t sig_b = mpi_new (qbits);
+
+ /* Put the relevant parameters into a public key structure. */
+ pk.p = sk->p;
+ pk.q = sk->q;
+ pk.g = sk->g;
+ pk.y = sk->y;
+
+ /* Create a random plaintext. */
+ _gcry_mpi_randomize (data, qbits, GCRY_WEAK_RANDOM);
+
+ /* Sign DATA using the secret key. */
+ sign (sig_a, sig_b, data, sk, 0, 0);
+
+ /* Verify the signature using the public key. */
+ if ( verify (sig_a, sig_b, data, &pk) )
+ goto leave; /* Signature does not match. */
+
+ /* Modify the data and check that the signing fails. */
+ mpi_add_ui (data, data, 1);
+ if ( !verify (sig_a, sig_b, data, &pk) )
+ goto leave; /* Signature matches but should not. */
+
+ result = 0; /* The test succeeded. */
+
+ leave:
+ _gcry_mpi_release (sig_b);
+ _gcry_mpi_release (sig_a);
+ _gcry_mpi_release (data);
+ return result;
+}
+
+
+
+/*
+ Generate a DSA key pair with a key of size NBITS. If transient_key
+ is true the key is generated using the standard RNG and not the
+ very secure one.
+
+ Returns: 2 structures filled with all needed values
+ and an array with the n-1 factors of (p-1)
+ */
+static gpg_err_code_t
+generate (DSA_secret_key *sk, unsigned int nbits, unsigned int qbits,
+ int transient_key, dsa_domain_t *domain, gcry_mpi_t **ret_factors )
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t p; /* the prime */
+ gcry_mpi_t q; /* the 160 bit prime factor */
+ gcry_mpi_t g; /* the generator */
+ gcry_mpi_t y; /* g^x mod p */
+ gcry_mpi_t x; /* the secret exponent */
+ gcry_mpi_t h, e; /* helper */
+ unsigned char *rndbuf;
+ gcry_random_level_t random_level;
+
+ if (qbits)
+ ; /* Caller supplied qbits. Use this value. */
+ else if ( nbits >= 512 && nbits <= 1024 )
+ qbits = 160;
+ else if ( nbits == 2048 )
+ qbits = 224;
+ else if ( nbits == 3072 )
+ qbits = 256;
+ else if ( nbits == 7680 )
+ qbits = 384;
+ else if ( nbits == 15360 )
+ qbits = 512;
+ else
+ return GPG_ERR_INV_VALUE;
+
+ if (qbits < 160 || qbits > 512 || (qbits%8) )
+ return GPG_ERR_INV_VALUE;
+ if (nbits < 2*qbits || nbits > 15360)
+ return GPG_ERR_INV_VALUE;
+
+ if (fips_mode ())
+ {
+ if (nbits < 1024)
+ return GPG_ERR_INV_VALUE;
+ if (transient_key)
+ return GPG_ERR_INV_VALUE;
+ }
+
+ if (domain->p && domain->q && domain->g)
+ {
+ /* Domain parameters are given; use them. */
+ p = mpi_copy (domain->p);
+ q = mpi_copy (domain->q);
+ g = mpi_copy (domain->g);
+ gcry_assert (mpi_get_nbits (p) == nbits);
+ gcry_assert (mpi_get_nbits (q) == qbits);
+ h = mpi_alloc (0);
+ e = NULL;
+ }
+ else
+ {
+ /* Generate new domain parameters. */
+ rc = _gcry_generate_elg_prime (1, nbits, qbits, NULL, &p, ret_factors);
+ if (rc)
+ return rc;
+
+ /* Get q out of factors. */
+ q = mpi_copy ((*ret_factors)[0]);
+ gcry_assert (mpi_get_nbits (q) == qbits);
+
+ /* Find a generator g (h and e are helpers).
+ e = (p-1)/q */
+ e = mpi_alloc (mpi_get_nlimbs (p));
+ mpi_sub_ui (e, p, 1);
+ mpi_fdiv_q (e, e, q);
+ g = mpi_alloc (mpi_get_nlimbs (p));
+ h = mpi_alloc_set_ui (1); /* (We start with 2.) */
+ do
+ {
+ mpi_add_ui (h, h, 1);
+ /* g = h^e mod p */
+ mpi_powm (g, h, e, p);
+ }
+ while (!mpi_cmp_ui (g, 1)); /* Continue until g != 1. */
+ }
+
+ /* Select a random number X with the property:
+ * 0 < x < q-1
+ *
+ * FIXME: Why do we use the requirement x < q-1 ? It should be
+ * sufficient to test for x < q. FIPS-186-3 check x < q-1 but it
+ * does not check for 0 < x because it makes sure that Q is unsigned
+ * and finally adds one to the result so that 0 will never be
+ * returned. We should replace the code below with _gcry_dsa_gen_k.
+ *
+ * This must be a very good random number because this is the secret
+ * part. The random quality depends on the transient_key flag. */
+ random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM;
+ if (DBG_CIPHER)
+ log_debug("choosing a random x%s\n", transient_key? " (transient-key)":"");
+ gcry_assert( qbits >= 160 );
+ x = mpi_alloc_secure( mpi_get_nlimbs(q) );
+ mpi_sub_ui( h, q, 1 ); /* put q-1 into h */
+ rndbuf = NULL;
+ do
+ {
+ if( DBG_CIPHER )
+ progress('.');
+ if( !rndbuf )
+ rndbuf = _gcry_random_bytes_secure ((qbits+7)/8, random_level);
+ else
+ { /* Change only some of the higher bits (= 2 bytes)*/
+ char *r = _gcry_random_bytes_secure (2, random_level);
+ memcpy(rndbuf, r, 2 );
+ xfree(r);
+ }
+
+ _gcry_mpi_set_buffer( x, rndbuf, (qbits+7)/8, 0 );
+ mpi_clear_highbit( x, qbits+1 );
+ }
+ while ( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, h )<0 ) );
+ xfree(rndbuf);
+ mpi_free( e );
+ mpi_free( h );
+
+ /* y = g^x mod p */
+ y = mpi_alloc( mpi_get_nlimbs(p) );
+ mpi_powm (y, g, x, p);
+
+ if( DBG_CIPHER )
+ {
+ progress('\n');
+ log_mpidump("dsa p", p );
+ log_mpidump("dsa q", q );
+ log_mpidump("dsa g", g );
+ log_mpidump("dsa y", y );
+ log_mpidump("dsa x", x );
+ }
+
+ /* Copy the stuff to the key structures. */
+ sk->p = p;
+ sk->q = q;
+ sk->g = g;
+ sk->y = y;
+ sk->x = x;
+
+ /* Now we can test our keys (this should never fail!). */
+ if ( test_keys (sk, qbits) )
+ {
+ _gcry_mpi_release (sk->p); sk->p = NULL;
+ _gcry_mpi_release (sk->q); sk->q = NULL;
+ _gcry_mpi_release (sk->g); sk->g = NULL;
+ _gcry_mpi_release (sk->y); sk->y = NULL;
+ _gcry_mpi_release (sk->x); sk->x = NULL;
+ fips_signal_error ("self-test after key generation failed");
+ return GPG_ERR_SELFTEST_FAILED;
+ }
+ return 0;
+}
+
+
+/* Generate a DSA key pair with a key of size NBITS using the
+ algorithm given in FIPS-186-3. If USE_FIPS186_2 is true,
+ FIPS-186-2 is used and thus the length is restricted to 1024/160.
+ If DERIVEPARMS is not NULL it may contain a seed value. If domain
+ parameters are specified in DOMAIN, DERIVEPARMS may not be given
+ and NBITS and QBITS must match the specified domain parameters. */
+static gpg_err_code_t
+generate_fips186 (DSA_secret_key *sk, unsigned int nbits, unsigned int qbits,
+ gcry_sexp_t deriveparms, int use_fips186_2,
+ dsa_domain_t *domain,
+ int *r_counter, void **r_seed, size_t *r_seedlen,
+ gcry_mpi_t *r_h)
+{
+ gpg_err_code_t ec;
+ struct {
+ gcry_sexp_t sexp;
+ const void *seed;
+ size_t seedlen;
+ } initial_seed = { NULL, NULL, 0 };
+ gcry_mpi_t prime_q = NULL;
+ gcry_mpi_t prime_p = NULL;
+ gcry_mpi_t value_g = NULL; /* The generator. */
+ gcry_mpi_t value_y = NULL; /* g^x mod p */
+ gcry_mpi_t value_x = NULL; /* The secret exponent. */
+ gcry_mpi_t value_h = NULL; /* Helper. */
+ gcry_mpi_t value_e = NULL; /* Helper. */
+ gcry_mpi_t value_c = NULL; /* helper for x */
+ gcry_mpi_t value_qm2 = NULL; /* q - 2 */
+
+ /* Preset return values. */
+ *r_counter = 0;
+ *r_seed = NULL;
+ *r_seedlen = 0;
+ *r_h = NULL;
+
+ /* Derive QBITS from NBITS if requested */
+ if (!qbits)
+ {
+ if (nbits == 1024)
+ qbits = 160;
+ else if (nbits == 2048)
+ qbits = 224;
+ else if (nbits == 3072)
+ qbits = 256;
+ }
+
+ /* Check that QBITS and NBITS match the standard. Note that FIPS
+ 186-3 uses N for QBITS and L for NBITS. */
+ if (nbits == 1024 && qbits == 160 && use_fips186_2)
+ ; /* Allowed in FIPS 186-2 mode. */
+ else if (nbits == 2048 && qbits == 224)
+ ;
+ else if (nbits == 2048 && qbits == 256)
+ ;
+ else if (nbits == 3072 && qbits == 256)
+ ;
+ else
+ return GPG_ERR_INV_VALUE;
+
+ if (domain->p && domain->q && domain->g)
+ {
+ /* Domain parameters are given; use them. */
+ prime_p = mpi_copy (domain->p);
+ prime_q = mpi_copy (domain->q);
+ value_g = mpi_copy (domain->g);
+ gcry_assert (mpi_get_nbits (prime_p) == nbits);
+ gcry_assert (mpi_get_nbits (prime_q) == qbits);
+ gcry_assert (!deriveparms);
+ ec = 0;
+ }
+ else
+ {
+ /* Generate new domain parameters. */
+
+ /* Get an initial seed value. */
+ if (deriveparms)
+ {
+ initial_seed.sexp = sexp_find_token (deriveparms, "seed", 0);
+ if (initial_seed.sexp)
+ initial_seed.seed = sexp_nth_data (initial_seed.sexp, 1,
+ &initial_seed.seedlen);
+ }
+
+ if (use_fips186_2)
+ ec = _gcry_generate_fips186_2_prime (nbits, qbits,
+ initial_seed.seed,
+ initial_seed.seedlen,
+ &prime_q, &prime_p,
+ r_counter,
+ r_seed, r_seedlen);
+ else
+ ec = _gcry_generate_fips186_3_prime (nbits, qbits,
+ initial_seed.seed,
+ initial_seed.seedlen,
+ &prime_q, &prime_p,
+ r_counter,
+ r_seed, r_seedlen, NULL);
+ sexp_release (initial_seed.sexp);
+ if (ec)
+ goto leave;
+
+ /* Find a generator g (h and e are helpers).
+ * e = (p-1)/q
+ */
+ value_e = mpi_alloc_like (prime_p);
+ mpi_sub_ui (value_e, prime_p, 1);
+ mpi_fdiv_q (value_e, value_e, prime_q );
+ value_g = mpi_alloc_like (prime_p);
+ value_h = mpi_alloc_set_ui (1);
+ do
+ {
+ mpi_add_ui (value_h, value_h, 1);
+ /* g = h^e mod p */
+ mpi_powm (value_g, value_h, value_e, prime_p);
+ }
+ while (!mpi_cmp_ui (value_g, 1)); /* Continue until g != 1. */
+ }
+
+ value_c = mpi_snew (qbits);
+ value_x = mpi_snew (qbits);
+ value_qm2 = mpi_snew (qbits);
+ mpi_sub_ui (value_qm2, prime_q, 2);
+
+ /* FIPS 186-4 B.1.2 steps 4-6 */
+ do
+ {
+ if( DBG_CIPHER )
+ progress('.');
+ _gcry_mpi_randomize (value_c, qbits, GCRY_VERY_STRONG_RANDOM);
+ mpi_clear_highbit (value_c, qbits+1);
+ }
+ while (!(mpi_cmp_ui (value_c, 0) > 0 && mpi_cmp (value_c, value_qm2) < 0));
+ /* while (mpi_cmp (value_c, value_qm2) > 0); */
+
+ /* x = c + 1 */
+ mpi_add_ui(value_x, value_c, 1);
+
+ /* y = g^x mod p */
+ value_y = mpi_alloc_like (prime_p);
+ mpi_powm (value_y, value_g, value_x, prime_p);
+
+ if (DBG_CIPHER)
+ {
+ progress('\n');
+ log_mpidump("dsa p", prime_p );
+ log_mpidump("dsa q", prime_q );
+ log_mpidump("dsa g", value_g );
+ log_mpidump("dsa y", value_y );
+ log_mpidump("dsa x", value_x );
+ log_mpidump("dsa h", value_h );
+ }
+
+ /* Copy the stuff to the key structures. */
+ sk->p = prime_p; prime_p = NULL;
+ sk->q = prime_q; prime_q = NULL;
+ sk->g = value_g; value_g = NULL;
+ sk->y = value_y; value_y = NULL;
+ sk->x = value_x; value_x = NULL;
+ *r_h = value_h; value_h = NULL;
+
+ leave:
+ _gcry_mpi_release (prime_p);
+ _gcry_mpi_release (prime_q);
+ _gcry_mpi_release (value_g);
+ _gcry_mpi_release (value_y);
+ _gcry_mpi_release (value_x);
+ _gcry_mpi_release (value_h);
+ _gcry_mpi_release (value_e);
+ _gcry_mpi_release (value_c);
+ _gcry_mpi_release (value_qm2);
+
+ /* As a last step test this keys (this should never fail of course). */
+ if (!ec && test_keys (sk, qbits) )
+ {
+ _gcry_mpi_release (sk->p); sk->p = NULL;
+ _gcry_mpi_release (sk->q); sk->q = NULL;
+ _gcry_mpi_release (sk->g); sk->g = NULL;
+ _gcry_mpi_release (sk->y); sk->y = NULL;
+ _gcry_mpi_release (sk->x); sk->x = NULL;
+ fips_signal_error ("self-test after key generation failed");
+ ec = GPG_ERR_SELFTEST_FAILED;
+ }
+
+ if (ec)
+ {
+ *r_counter = 0;
+ xfree (*r_seed); *r_seed = NULL;
+ *r_seedlen = 0;
+ _gcry_mpi_release (*r_h); *r_h = NULL;
+ }
+
+ return ec;
+}
+
+
+
+/*
+ Test whether the secret key is valid.
+ Returns: if this is a valid key.
+ */
+static int
+check_secret_key( DSA_secret_key *sk )
+{
+ int rc;
+ gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) );
+
+ mpi_powm( y, sk->g, sk->x, sk->p );
+ rc = !mpi_cmp( y, sk->y );
+ mpi_free( y );
+ return rc;
+}
+
+
+
+/*
+ Make a DSA signature from INPUT and put it into r and s.
+
+ INPUT may either be a plain MPI or an opaque MPI which is then
+ internally converted to a plain MPI. FLAGS and HASHALGO may both
+ be 0 for standard operation mode.
+
+ The return value is 0 on success or an error code. Note that for
+ backward compatibility the function will not return any error if
+ FLAGS and HASHALGO are both 0 and INPUT is a plain MPI.
+ */
+static gpg_err_code_t
+sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, DSA_secret_key *skey,
+ int flags, int hashalgo)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t hash;
+ gcry_mpi_t k;
+ gcry_mpi_t kinv;
+ gcry_mpi_t tmp;
+ const void *abuf;
+ unsigned int abits, qbits;
+ int extraloops = 0;
+
+ qbits = mpi_get_nbits (skey->q);
+
+ /* Convert the INPUT into an MPI. */
+ rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+ if (rc)
+ return rc;
+
+ again:
+ /* Create the K value. */
+ if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo)
+ {
+ /* Use Pornin's method for deterministic DSA. If this flag is
+ set, it is expected that HASH is an opaque MPI with the to be
+ signed hash. That hash is also used as h1 from 3.2.a. */
+ if (!mpi_is_opaque (input))
+ {
+ rc = GPG_ERR_CONFLICT;
+ goto leave;
+ }
+
+ abuf = mpi_get_opaque (input, &abits);
+ rc = _gcry_dsa_gen_rfc6979_k (&k, skey->q, skey->x,
+ abuf, (abits+7)/8, hashalgo, extraloops);
+ if (rc)
+ goto leave;
+ }
+ else
+ {
+ /* Select a random k with 0 < k < q */
+ k = _gcry_dsa_gen_k (skey->q, GCRY_STRONG_RANDOM);
+ }
+
+ /* kinv = k^(-1) mod q */
+ kinv = mpi_alloc( mpi_get_nlimbs(k) );
+ mpi_invm(kinv, k, skey->q );
+
+ _gcry_dsa_modify_k (k, skey->q, qbits);
+
+ /* r = (a^k mod p) mod q */
+ mpi_powm( r, skey->g, k, skey->p );
+ mpi_fdiv_r( r, r, skey->q );
+
+ /* s = (kinv * ( hash + x * r)) mod q */
+ tmp = mpi_alloc( mpi_get_nlimbs(skey->p) );
+ mpi_mul( tmp, skey->x, r );
+ mpi_add( tmp, tmp, hash );
+ mpi_mulm( s , kinv, tmp, skey->q );
+
+ mpi_free(k);
+ mpi_free(kinv);
+ mpi_free(tmp);
+
+ if (!mpi_cmp_ui (r, 0))
+ {
+ /* This is a highly unlikely code path. */
+ extraloops++;
+ goto again;
+ }
+
+ rc = 0;
+
+ leave:
+ if (hash != input)
+ mpi_free (hash);
+
+ return rc;
+}
+
+
+/*
+ Returns true if the signature composed from R and S is valid.
+ */
+static gpg_err_code_t
+verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, DSA_public_key *pkey )
+{
+ gpg_err_code_t rc = 0;
+ gcry_mpi_t w, u1, u2, v;
+ gcry_mpi_t base[3];
+ gcry_mpi_t ex[3];
+ gcry_mpi_t hash;
+ unsigned int nbits;
+
+ if( !(mpi_cmp_ui( r, 0 ) > 0 && mpi_cmp( r, pkey->q ) < 0) )
+ return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */
+ if( !(mpi_cmp_ui( s, 0 ) > 0 && mpi_cmp( s, pkey->q ) < 0) )
+ return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */
+
+ nbits = mpi_get_nbits (pkey->q);
+ rc = _gcry_dsa_normalize_hash (input, &hash, nbits);
+ if (rc)
+ return rc;
+
+ w = mpi_alloc( mpi_get_nlimbs(pkey->q) );
+ u1 = mpi_alloc( mpi_get_nlimbs(pkey->q) );
+ u2 = mpi_alloc( mpi_get_nlimbs(pkey->q) );
+ v = mpi_alloc( mpi_get_nlimbs(pkey->p) );
+
+ /* w = s^(-1) mod q */
+ mpi_invm( w, s, pkey->q );
+
+ /* u1 = (hash * w) mod q */
+ mpi_mulm( u1, hash, w, pkey->q );
+
+ /* u2 = r * w mod q */
+ mpi_mulm( u2, r, w, pkey->q );
+
+ /* v = g^u1 * y^u2 mod p mod q */
+ base[0] = pkey->g; ex[0] = u1;
+ base[1] = pkey->y; ex[1] = u2;
+ base[2] = NULL; ex[2] = NULL;
+ mpi_mulpowm( v, base, ex, pkey->p );
+ mpi_fdiv_r( v, v, pkey->q );
+
+ if (mpi_cmp( v, r ))
+ {
+ if (DBG_CIPHER)
+ {
+ log_mpidump (" i", input);
+ log_mpidump (" h", hash);
+ log_mpidump (" v", v);
+ log_mpidump (" r", r);
+ log_mpidump (" s", s);
+ }
+ rc = GPG_ERR_BAD_SIGNATURE;
+ }
+
+ mpi_free(w);
+ mpi_free(u1);
+ mpi_free(u2);
+ mpi_free(v);
+ if (hash != input)
+ mpi_free (hash);
+
+ return rc;
+}
+
+
+/*********************************************
+ ************** interface ******************
+ *********************************************/
+
+static gcry_err_code_t
+dsa_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
+{
+ gpg_err_code_t rc;
+ unsigned int nbits;
+ gcry_sexp_t domainsexp;
+ DSA_secret_key sk;
+ gcry_sexp_t l1;
+ unsigned int qbits = 0;
+ gcry_sexp_t deriveparms = NULL;
+ gcry_sexp_t seedinfo = NULL;
+ gcry_sexp_t misc_info = NULL;
+ int flags = 0;
+ dsa_domain_t domain;
+ gcry_mpi_t *factors = NULL;
+
+ memset (&sk, 0, sizeof sk);
+ memset (&domain, 0, sizeof domain);
+
+ rc = _gcry_pk_util_get_nbits (genparms, &nbits);
+ if (rc)
+ return rc;
+
+ /* Parse the optional flags list. */
+ l1 = sexp_find_token (genparms, "flags", 0);
+ if (l1)
+ {
+ rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+ sexp_release (l1);
+ if (rc)
+ return rc;\
+ }
+
+ /* Parse the optional qbits element. */
+ l1 = sexp_find_token (genparms, "qbits", 0);
+ if (l1)
+ {
+ char buf[50];
+ const char *s;
+ size_t n;
+
+ s = sexp_nth_data (l1, 1, &n);
+ if (!s || n >= DIM (buf) - 1 )
+ {
+ sexp_release (l1);
+ return GPG_ERR_INV_OBJ; /* No value or value too large. */
+ }
+ memcpy (buf, s, n);
+ buf[n] = 0;
+ qbits = (unsigned int)strtoul (buf, NULL, 0);
+ sexp_release (l1);
+ }
+
+ /* Parse the optional transient-key flag. */
+ if (!(flags & PUBKEY_FLAG_TRANSIENT_KEY))
+ {
+ l1 = sexp_find_token (genparms, "transient-key", 0);
+ if (l1)
+ {
+ flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+ sexp_release (l1);
+ }
+ }
+
+ /* Get the optional derive parameters. */
+ deriveparms = sexp_find_token (genparms, "derive-parms", 0);
+
+ /* Parse the optional "use-fips186" flags. */
+ if (!(flags & PUBKEY_FLAG_USE_FIPS186))
+ {
+ l1 = sexp_find_token (genparms, "use-fips186", 0);
+ if (l1)
+ {
+ flags |= PUBKEY_FLAG_USE_FIPS186;
+ sexp_release (l1);
+ }
+ }
+ if (!(flags & PUBKEY_FLAG_USE_FIPS186_2))
+ {
+ l1 = sexp_find_token (genparms, "use-fips186-2", 0);
+ if (l1)
+ {
+ flags |= PUBKEY_FLAG_USE_FIPS186_2;
+ sexp_release (l1);
+ }
+ }
+
+ /* Check whether domain parameters are given. */
+ domainsexp = sexp_find_token (genparms, "domain", 0);
+ if (domainsexp)
+ {
+ /* DERIVEPARMS can't be used together with domain parameters.
+ NBITS abnd QBITS may not be specified because there values
+ are derived from the domain parameters. */
+ if (deriveparms || qbits || nbits)
+ {
+ sexp_release (domainsexp);
+ sexp_release (deriveparms);
+ return GPG_ERR_INV_VALUE;
+ }
+
+ /* Put all domain parameters into the domain object. */
+ l1 = sexp_find_token (domainsexp, "p", 0);
+ domain.p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ l1 = sexp_find_token (domainsexp, "q", 0);
+ domain.q = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ l1 = sexp_find_token (domainsexp, "g", 0);
+ domain.g = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ sexp_release (domainsexp);
+
+ /* Check that all domain parameters are available. */
+ if (!domain.p || !domain.q || !domain.g)
+ {
+ _gcry_mpi_release (domain.p);
+ _gcry_mpi_release (domain.q);
+ _gcry_mpi_release (domain.g);
+ sexp_release (deriveparms);
+ return GPG_ERR_MISSING_VALUE;
+ }
+
+ /* Get NBITS and QBITS from the domain parameters. */
+ nbits = mpi_get_nbits (domain.p);
+ qbits = mpi_get_nbits (domain.q);
+ }
+
+ if (deriveparms
+ || (flags & PUBKEY_FLAG_USE_FIPS186)
+ || (flags & PUBKEY_FLAG_USE_FIPS186_2)
+ || fips_mode ())
+ {
+ int counter;
+ void *seed;
+ size_t seedlen;
+ gcry_mpi_t h_value;
+
+ rc = generate_fips186 (&sk, nbits, qbits, deriveparms,
+ !!(flags & PUBKEY_FLAG_USE_FIPS186_2),
+ &domain,
+ &counter, &seed, &seedlen, &h_value);
+ if (!rc && h_value)
+ {
+ /* Format the seed-values unless domain parameters are used
+ for which a H_VALUE of NULL is an indication. */
+ rc = sexp_build (&seedinfo, NULL,
+ "(seed-values(counter %d)(seed %b)(h %m))",
+ counter, (int)seedlen, seed, h_value);
+ xfree (seed);
+ _gcry_mpi_release (h_value);
+ }
+ }
+ else
+ {
+ rc = generate (&sk, nbits, qbits,
+ !!(flags & PUBKEY_FLAG_TRANSIENT_KEY),
+ &domain, &factors);
+ }
+
+ if (!rc)
+ {
+ /* Put the factors into MISC_INFO. Note that the factors are
+ not confidential thus we can store them in standard memory. */
+ int nfactors, i, j;
+ char *p;
+ char *format = NULL;
+ void **arg_list = NULL;
+
+ for (nfactors=0; factors && factors[nfactors]; nfactors++)
+ ;
+ /* Allocate space for the format string:
+ "(misc-key-info%S(pm1-factors%m))"
+ with one "%m" for each factor and construct it. */
+ format = xtrymalloc (50 + 2*nfactors);
+ if (!format)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ p = stpcpy (format, "(misc-key-info");
+ if (seedinfo)
+ p = stpcpy (p, "%S");
+ if (nfactors)
+ {
+ p = stpcpy (p, "(pm1-factors");
+ for (i=0; i < nfactors; i++)
+ p = stpcpy (p, "%m");
+ p = stpcpy (p, ")");
+ }
+ p = stpcpy (p, ")");
+
+ /* Allocate space for the list of factors plus one for the
+ seedinfo s-exp plus an extra NULL entry for safety and
+ fill it with the factors. */
+ arg_list = xtrycalloc (nfactors+1+1, sizeof *arg_list);
+ if (!arg_list)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ i = 0;
+ if (seedinfo)
+ arg_list[i++] = &seedinfo;
+ for (j=0; j < nfactors; j++)
+ arg_list[i++] = factors + j;
+ arg_list[i] = NULL;
+
+ rc = sexp_build_array (&misc_info, NULL, format, arg_list);
+ }
+ }
+
+ xfree (arg_list);
+ xfree (format);
+ }
+
+ if (!rc)
+ rc = sexp_build (r_skey, NULL,
+ "(key-data"
+ " (public-key"
+ " (dsa(p%m)(q%m)(g%m)(y%m)))"
+ " (private-key"
+ " (dsa(p%m)(q%m)(g%m)(y%m)(x%m)))"
+ " %S)",
+ sk.p, sk.q, sk.g, sk.y,
+ sk.p, sk.q, sk.g, sk.y, sk.x,
+ misc_info);
+
+
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.q);
+ _gcry_mpi_release (sk.g);
+ _gcry_mpi_release (sk.y);
+ _gcry_mpi_release (sk.x);
+
+ _gcry_mpi_release (domain.p);
+ _gcry_mpi_release (domain.q);
+ _gcry_mpi_release (domain.g);
+
+ sexp_release (seedinfo);
+ sexp_release (misc_info);
+ sexp_release (deriveparms);
+ if (factors)
+ {
+ gcry_mpi_t *mp;
+ for (mp = factors; *mp; mp++)
+ mpi_free (*mp);
+ xfree (factors);
+ }
+ return rc;
+}
+
+
+
+static gcry_err_code_t
+dsa_check_secret_key (gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL};
+
+ rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx",
+ &sk.p, &sk.q, &sk.g, &sk.y, &sk.x,
+ NULL);
+ if (rc)
+ goto leave;
+
+ if (!check_secret_key (&sk))
+ rc = GPG_ERR_BAD_SECKEY;
+
+ leave:
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.q);
+ _gcry_mpi_release (sk.g);
+ _gcry_mpi_release (sk.y);
+ _gcry_mpi_release (sk.x);
+ if (DBG_CIPHER)
+ log_debug ("dsa_testkey => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+dsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_mpi_t data = NULL;
+ DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL};
+ gcry_mpi_t sig_r = NULL;
+ gcry_mpi_t sig_s = NULL;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN,
+ dsa_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("dsa_sign data", data);
+
+ /* Extract the key. */
+ rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx",
+ &sk.p, &sk.q, &sk.g, &sk.y, &sk.x, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("dsa_sign p", sk.p);
+ log_mpidump ("dsa_sign q", sk.q);
+ log_mpidump ("dsa_sign g", sk.g);
+ log_mpidump ("dsa_sign y", sk.y);
+ if (!fips_mode ())
+ log_mpidump ("dsa_sign x", sk.x);
+ }
+
+ sig_r = mpi_new (0);
+ sig_s = mpi_new (0);
+ rc = sign (sig_r, sig_s, data, &sk, ctx.flags, ctx.hash_algo);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("dsa_sign sig_r", sig_r);
+ log_mpidump ("dsa_sign sig_s", sig_s);
+ }
+ rc = sexp_build (r_sig, NULL, "(sig-val(dsa(r%M)(s%M)))", sig_r, sig_s);
+
+ leave:
+ _gcry_mpi_release (sig_r);
+ _gcry_mpi_release (sig_s);
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.q);
+ _gcry_mpi_release (sk.g);
+ _gcry_mpi_release (sk.y);
+ _gcry_mpi_release (sk.x);
+ _gcry_mpi_release (data);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("dsa_sign => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+dsa_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_sexp_t l1 = NULL;
+ gcry_mpi_t sig_r = NULL;
+ gcry_mpi_t sig_s = NULL;
+ gcry_mpi_t data = NULL;
+ DSA_public_key pk = { NULL, NULL, NULL, NULL };
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY,
+ dsa_get_nbits (s_keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("dsa_verify data", data);
+
+ /* Extract the signature value. */
+ rc = _gcry_pk_util_preparse_sigval (s_sig, dsa_names, &l1, NULL);
+ if (rc)
+ goto leave;
+ rc = _gcry_sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("dsa_verify s_r", sig_r);
+ log_mpidump ("dsa_verify s_s", sig_s);
+ }
+
+ /* Extract the key. */
+ rc = _gcry_sexp_extract_param (s_keyparms, NULL, "pqgy",
+ &pk.p, &pk.q, &pk.g, &pk.y, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("dsa_verify p", pk.p);
+ log_mpidump ("dsa_verify q", pk.q);
+ log_mpidump ("dsa_verify g", pk.g);
+ log_mpidump ("dsa_verify y", pk.y);
+ }
+
+ /* Verify the signature. */
+ rc = verify (sig_r, sig_s, data, &pk);
+
+ leave:
+ _gcry_mpi_release (pk.p);
+ _gcry_mpi_release (pk.q);
+ _gcry_mpi_release (pk.g);
+ _gcry_mpi_release (pk.y);
+ _gcry_mpi_release (data);
+ _gcry_mpi_release (sig_r);
+ _gcry_mpi_release (sig_s);
+ sexp_release (l1);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("dsa_verify => %s\n", rc?gpg_strerror (rc):"Good");
+ return rc;
+}
+
+
+/* Return the number of bits for the key described by PARMS. On error
+ * 0 is returned. The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ * (dsa
+ * (p <mpi>)
+ * (q <mpi>)
+ * (g <mpi>)
+ * (y <mpi>))
+ *
+ * More parameters may be given but we only need P here.
+ */
+static unsigned int
+dsa_get_nbits (gcry_sexp_t parms)
+{
+ gcry_sexp_t l1;
+ gcry_mpi_t p;
+ unsigned int nbits;
+
+ l1 = sexp_find_token (parms, "p", 1);
+ if (!l1)
+ return 0; /* Parameter P not found. */
+
+ p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ nbits = p? mpi_get_nbits (p) : 0;
+ _gcry_mpi_release (p);
+ return nbits;
+}
+
+
+
+/*
+ Self-test section.
+ */
+
+static const char *
+selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey)
+{
+ /* Sample data from RFC 6979 section A.2.2, hash is of message "sample" */
+ static const char sample_data[] =
+ "(data (flags rfc6979)"
+ " (hash sha256 #af2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e9891562113d8a62add1bf#))";
+ static const char sample_data_bad[] =
+ "(data (flags rfc6979)"
+ " (hash sha256 #bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e9891562113d8a62add1bf#))";
+ static const char signature_r[] =
+ "eace8bdbbe353c432a795d9ec556c6d021f7a03f42c36e9bc87e4ac7932cc809";
+ static const char signature_s[] =
+ "7081e175455f9247b812b74583e9e94f9ea79bd640dc962533b0680793a38d53";
+
+ const char *errtxt = NULL;
+ gcry_error_t err;
+ gcry_sexp_t data = NULL;
+ gcry_sexp_t data_bad = NULL;
+ gcry_sexp_t sig = NULL;
+ gcry_sexp_t l1 = NULL;
+ gcry_sexp_t l2 = NULL;
+ gcry_mpi_t r = NULL;
+ gcry_mpi_t s = NULL;
+ gcry_mpi_t calculated_r = NULL;
+ gcry_mpi_t calculated_s = NULL;
+ int cmp;
+
+ err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data));
+ if (!err)
+ err = sexp_sscan (&data_bad, NULL,
+ sample_data_bad, strlen (sample_data_bad));
+ if (!err)
+ err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL);
+ if (!err)
+ err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL);
+
+ if (err)
+ {
+ errtxt = "converting data failed";
+ goto leave;
+ }
+
+ err = _gcry_pk_sign (&sig, data, skey);
+ if (err)
+ {
+ errtxt = "signing failed";
+ goto leave;
+ }
+
+ /* check against known signature */
+ errtxt = "signature validity failed";
+ l1 = _gcry_sexp_find_token (sig, "sig-val", 0);
+ if (!l1)
+ goto leave;
+ l2 = _gcry_sexp_find_token (l1, "dsa", 0);
+ if (!l2)
+ goto leave;
+
+ sexp_release (l1);
+ l1 = l2;
+
+ l2 = _gcry_sexp_find_token (l1, "r", 0);
+ if (!l2)
+ goto leave;
+ calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+ if (!calculated_r)
+ goto leave;
+
+ sexp_release (l2);
+ l2 = _gcry_sexp_find_token (l1, "s", 0);
+ if (!l2)
+ goto leave;
+ calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+ if (!calculated_s)
+ goto leave;
+
+ errtxt = "known sig check failed";
+
+ cmp = _gcry_mpi_cmp (r, calculated_r);
+ if (cmp)
+ goto leave;
+ cmp = _gcry_mpi_cmp (s, calculated_s);
+ if (cmp)
+ goto leave;
+
+ errtxt = NULL;
+
+
+ err = _gcry_pk_verify (sig, data, pkey);
+ if (err)
+ {
+ errtxt = "verify failed";
+ goto leave;
+ }
+ err = _gcry_pk_verify (sig, data_bad, pkey);
+ if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
+ {
+ errtxt = "bad signature not detected";
+ goto leave;
+ }
+
+
+ leave:
+ _gcry_mpi_release (calculated_s);
+ _gcry_mpi_release (calculated_r);
+ _gcry_mpi_release (s);
+ _gcry_mpi_release (r);
+ sexp_release (l2);
+ sexp_release (l1);
+ sexp_release (sig);
+ sexp_release (data_bad);
+ sexp_release (data);
+ return errtxt;
+}
+
+
+static gpg_err_code_t
+selftests_dsa_2048 (selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+ gcry_error_t err;
+ gcry_sexp_t skey = NULL;
+ gcry_sexp_t pkey = NULL;
+
+ /* Convert the S-expressions into the internal representation. */
+ what = "convert";
+ err = sexp_sscan (&skey, NULL, sample_secret_key_2048, strlen (sample_secret_key_2048));
+ if (!err)
+ err = sexp_sscan (&pkey, NULL,
+ sample_public_key_2048, strlen (sample_public_key_2048));
+ if (err)
+ {
+ errtxt = _gcry_strerror (err);
+ goto failed;
+ }
+
+ what = "key consistency";
+ err = _gcry_pk_testkey (skey);
+ if (err)
+ {
+ errtxt = _gcry_strerror (err);
+ goto failed;
+ }
+
+ what = "sign";
+ errtxt = selftest_sign (pkey, skey);
+ if (errtxt)
+ goto failed;
+
+ sexp_release (pkey);
+ sexp_release (skey);
+ return 0; /* Succeeded. */
+
+ failed:
+ sexp_release (pkey);
+ sexp_release (skey);
+ if (report)
+ report ("pubkey", GCRY_PK_DSA, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ (void)extended;
+
+ switch (algo)
+ {
+ case GCRY_PK_DSA:
+ ec = selftests_dsa_2048 (report);
+ break;
+ default:
+ ec = GPG_ERR_PUBKEY_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+
+
+gcry_pk_spec_t _gcry_pubkey_spec_dsa =
+ {
+ GCRY_PK_DSA, { 0, 1 },
+ GCRY_PK_USAGE_SIGN,
+ "DSA", dsa_names,
+ "pqgy", "pqgyx", "", "rs", "pqgy",
+ dsa_generate,
+ dsa_check_secret_key,
+ NULL,
+ NULL,
+ dsa_sign,
+ dsa_verify,
+ dsa_get_nbits,
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/ecc-common.h b/comm/third_party/libgcrypt/cipher/ecc-common.h
new file mode 100644
index 0000000000..25c3111263
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-common.h
@@ -0,0 +1,140 @@
+/* ecc-common.h - Declarations of common ECC code
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ECC_COMMON_H
+#define GCRY_ECC_COMMON_H
+
+
+/* Definition of a curve. */
+typedef struct
+{
+ enum gcry_mpi_ec_models model;/* The model descrinbing this curve. */
+ enum ecc_dialects dialect; /* The dialect used with the curve. */
+ gcry_mpi_t p; /* Prime specifying the field GF(p). */
+ gcry_mpi_t a; /* First coefficient of the Weierstrass equation. */
+ gcry_mpi_t b; /* Second coefficient of the Weierstrass equation.
+ or d as used by Twisted Edwards curves. */
+ mpi_point_struct G; /* Base point (generator). */
+ gcry_mpi_t n; /* Order of G. */
+ unsigned int h; /* Cofactor. */
+ const char *name; /* Name of the curve or NULL. */
+} elliptic_curve_t;
+
+
+
+/* Set the value from S into D. */
+static inline void
+point_set (mpi_point_t d, mpi_point_t s)
+{
+ mpi_set (d->x, s->x);
+ mpi_set (d->y, s->y);
+ mpi_set (d->z, s->z);
+}
+
+#define point_init(a) _gcry_mpi_point_init ((a))
+#define point_free(a) _gcry_mpi_point_free_parts ((a))
+
+
+/*-- ecc-curves.c --*/
+gpg_err_code_t _gcry_ecc_fill_in_curve (unsigned int nbits,
+ const char *name,
+ elliptic_curve_t *curve,
+ unsigned int *r_nbits);
+gpg_err_code_t _gcry_ecc_update_curve_param (const char *name,
+ enum gcry_mpi_ec_models *model,
+ enum ecc_dialects *dialect,
+ gcry_mpi_t *p, gcry_mpi_t *a,
+ gcry_mpi_t *b, gcry_mpi_t *g,
+ gcry_mpi_t *n);
+
+const char *_gcry_ecc_get_curve (gcry_sexp_t keyparms,
+ int iterator,
+ unsigned int *r_nbits);
+gcry_sexp_t _gcry_ecc_get_param_sexp (const char *name);
+
+/*-- ecc-misc.c --*/
+void _gcry_ecc_curve_free (elliptic_curve_t *E);
+elliptic_curve_t _gcry_ecc_curve_copy (elliptic_curve_t E);
+const char *_gcry_ecc_model2str (enum gcry_mpi_ec_models model);
+const char *_gcry_ecc_dialect2str (enum ecc_dialects dialect);
+gcry_mpi_t _gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p);
+
+mpi_point_t _gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_mont_encodepoint (gcry_mpi_t x, unsigned int nbits,
+ int with_prefix,
+ unsigned char **r_buffer,
+ unsigned int *r_buflen);
+
+
+/*-- ecc.c --*/
+
+/*-- ecc-ecdsa.c --*/
+gpg_err_code_t _gcry_ecc_ecdsa_sign (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s,
+ int flags, int hashalgo);
+gpg_err_code_t _gcry_ecc_ecdsa_verify (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s);
+
+/*-- ecc-eddsa.c --*/
+gpg_err_code_t _gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign,
+ mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ctx,
+ gcry_mpi_t x, gcry_mpi_t y,
+ int with_prefix,
+ unsigned char **r_buffer,
+ unsigned int *r_buflen);
+gpg_err_code_t _gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value,
+ unsigned int nbits);
+
+
+gpg_err_code_t _gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest,
+ mpi_ec_t ec);
+
+gpg_err_code_t _gcry_ecc_eddsa_genkey (mpi_ec_t ec, int flags);
+gpg_err_code_t _gcry_ecc_eddsa_sign (gcry_mpi_t input,
+ mpi_ec_t ec,
+ gcry_mpi_t r_r, gcry_mpi_t s,
+ struct pk_encoding_ctx *ctx);
+gpg_err_code_t _gcry_ecc_eddsa_verify (gcry_mpi_t input,
+ mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s,
+ struct pk_encoding_ctx *ctx);
+void reverse_buffer (unsigned char *buffer, unsigned int length);
+
+
+/*-- ecc-gost.c --*/
+gpg_err_code_t _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s);
+gpg_err_code_t _gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s);
+
+
+/*-- ecc-sm2.c --*/
+gpg_err_code_t _gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph,
+ gcry_mpi_t input, mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain,
+ gcry_sexp_t data_list, mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s,
+ int flags, int hashalgo);
+gpg_err_code_t _gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s);
+
+
+#endif /*GCRY_ECC_COMMON_H*/
diff --git a/comm/third_party/libgcrypt/cipher/ecc-curves.c b/comm/third_party/libgcrypt/cipher/ecc-curves.c
new file mode 100644
index 0000000000..900b668aac
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-curves.c
@@ -0,0 +1,1603 @@
+/* ecc-curves.c - Elliptic Curve parameter mangement
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "mpi-internal.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
+
+
+static gpg_err_code_t
+point_from_keyparam (gcry_mpi_point_t *r_a,
+ gcry_sexp_t keyparam, const char *name, mpi_ec_t ec);
+
+/* This tables defines aliases for curve names. */
+static const struct
+{
+ const char *name; /* Our name. */
+ const char *other; /* Other name. */
+} curve_aliases[] =
+ {
+ { "Ed25519", "1.3.6.1.4.1.11591.15.1" }, /* OpenPGP */
+ { "Ed25519", "1.3.101.112" }, /* rfc8410 */
+
+ { "Curve25519", "1.3.6.1.4.1.3029.1.5.1" }, /* OpenPGP */
+ { "Curve25519", "1.3.101.110" }, /* rfc8410 */
+ { "Curve25519", "X25519" }, /* rfc8410 */
+
+ { "Ed448", "1.3.101.113" }, /* rfc8410 */
+ { "X448", "1.3.101.111" }, /* rfc8410 */
+
+ { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID */
+ { "NIST P-192", "prime192v1" }, /* X9.62 name. */
+ { "NIST P-192", "secp192r1" }, /* SECP name. */
+ { "NIST P-192", "nistp192" }, /* rfc5656. */
+
+ { "NIST P-224", "secp224r1" },
+ { "NIST P-224", "1.3.132.0.33" }, /* SECP OID. */
+ { "NIST P-224", "nistp224" }, /* rfc5656. */
+
+ { "NIST P-256", "1.2.840.10045.3.1.7" }, /* From NIST SP 800-78-1. */
+ { "NIST P-256", "prime256v1" },
+ { "NIST P-256", "secp256r1" },
+ { "NIST P-256", "nistp256" }, /* rfc5656. */
+
+ { "NIST P-384", "secp384r1" },
+ { "NIST P-384", "1.3.132.0.34" },
+ { "NIST P-384", "nistp384" }, /* rfc5656. */
+
+ { "NIST P-521", "secp521r1" },
+ { "NIST P-521", "1.3.132.0.35" },
+ { "NIST P-521", "nistp521" }, /* rfc5656. */
+
+ { "brainpoolP160r1", "1.3.36.3.3.2.8.1.1.1" },
+ { "brainpoolP192r1", "1.3.36.3.3.2.8.1.1.3" },
+ { "brainpoolP224r1", "1.3.36.3.3.2.8.1.1.5" },
+ { "brainpoolP256r1", "1.3.36.3.3.2.8.1.1.7" },
+ { "brainpoolP320r1", "1.3.36.3.3.2.8.1.1.9" },
+ { "brainpoolP384r1", "1.3.36.3.3.2.8.1.1.11"},
+ { "brainpoolP512r1", "1.3.36.3.3.2.8.1.1.13"},
+
+ { "GOST2001-test", "1.2.643.2.2.35.0" },
+ { "GOST2001-CryptoPro-A", "1.2.643.2.2.35.1" },
+ { "GOST2001-CryptoPro-B", "1.2.643.2.2.35.2" },
+ { "GOST2001-CryptoPro-C", "1.2.643.2.2.35.3" },
+ { "GOST2001-CryptoPro-A", "GOST2001-CryptoPro-XchA" },
+ { "GOST2001-CryptoPro-C", "GOST2001-CryptoPro-XchB" },
+ { "GOST2001-CryptoPro-A", "1.2.643.2.2.36.0" },
+ { "GOST2001-CryptoPro-C", "1.2.643.2.2.36.1" },
+
+ { "GOST2012-256-tc26-A", "1.2.643.7.1.2.1.1.1" },
+ { "GOST2001-CryptoPro-A", "1.2.643.7.1.2.1.1.2" },
+ { "GOST2001-CryptoPro-A", "GOST2012-256-tc26-B" },
+ { "GOST2001-CryptoPro-B", "1.2.643.7.1.2.1.1.3" },
+ { "GOST2001-CryptoPro-B", "GOST2012-256-tc26-C" },
+ { "GOST2001-CryptoPro-C", "1.2.643.7.1.2.1.1.4" },
+ { "GOST2001-CryptoPro-C", "GOST2012-256-tc26-D" },
+
+ { "GOST2012-512-test", "GOST2012-test" },
+ { "GOST2012-512-test", "1.2.643.7.1.2.1.2.0" },
+ { "GOST2012-512-tc26-A", "GOST2012-tc26-A" },
+ { "GOST2012-512-tc26-B", "GOST2012-tc26-B" },
+ { "GOST2012-512-tc26-A", "1.2.643.7.1.2.1.2.1" },
+ { "GOST2012-512-tc26-B", "1.2.643.7.1.2.1.2.2" },
+ { "GOST2012-512-tc26-C", "1.2.643.7.1.2.1.2.3" },
+
+ { "secp256k1", "1.3.132.0.10" },
+
+ { "sm2p256v1", "1.2.156.10197.1.301" },
+
+ { NULL, NULL}
+ };
+
+
+typedef struct
+{
+ const char *desc; /* Description of the curve. */
+ unsigned int nbits; /* Number of bits. */
+ unsigned int fips:1; /* True if this is a FIPS140-2 approved curve. */
+
+ /* The model describing this curve. This is mainly used to select
+ the group equation. */
+ enum gcry_mpi_ec_models model;
+
+ /* The actual ECC dialect used. This is used for curve specific
+ optimizations and to select encodings etc. */
+ enum ecc_dialects dialect;
+
+ const char *p; /* The prime defining the field. */
+ const char *a, *b; /* The coefficients. For Twisted Edwards
+ Curves b is used for d. For Montgomery
+ Curves (a,b) has ((A-2)/4,B^-1). */
+ const char *n; /* The order of the base point. */
+ const char *g_x, *g_y; /* Base point. */
+ unsigned int h; /* Cofactor. */
+} ecc_domain_parms_t;
+
+
+/* This static table defines all available curves. */
+static const ecc_domain_parms_t domain_parms[] =
+ {
+ {
+ /* (-x^2 + y^2 = 1 + dx^2y^2) */
+ "Ed25519", 255, 0,
+ MPI_EC_EDWARDS, ECC_DIALECT_ED25519,
+ "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+ "-0x01",
+ "-0x2DFC9311D490018C7338BF8688861767FF8FF5B2BEBE27548A14B235ECA6874A",
+ "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED",
+ "0x216936D3CD6E53FEC0A4E231FDD6DC5C692CC7609525A7B2C9562D608F25D51A",
+ "0x6666666666666666666666666666666666666666666666666666666666666658",
+ 8
+ },
+ {
+ /* (y^2 = x^3 + 486662*x^2 + x) */
+ "Curve25519", 255, 0,
+ MPI_EC_MONTGOMERY, ECC_DIALECT_STANDARD,
+ "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+ "0x01DB41",
+ "0x01",
+ "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED",
+ "0x0000000000000000000000000000000000000000000000000000000000000009",
+ "0x20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9",
+ 8
+ /* Note: As per RFC-7748 errata eid4730 the g_y value should be
+ * "0x5F51E65E475F794B1FE122D388B72EB36DC2B28192839E4DD6163A5D81312C14"
+ * but that breaks the keygrip. The new value is recovered in
+ * the function _gcry_ecc_fill_in_curve. See bug #4712.
+ */
+ },
+ {
+ /* (x^2 + y^2 = 1 + dx^2y^2) */
+ "Ed448", 448, 0,
+ MPI_EC_EDWARDS, ECC_DIALECT_SAFECURVE,
+ "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+ "0x01",
+ "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF6756",
+ "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+ "7CCA23E9C44EDB49AED63690216CC2728DC58F552378C292AB5844F3",
+ "0x4F1970C66BED0DED221D15A622BF36DA9E146570470F1767EA6DE324"
+ "A3D3A46412AE1AF72AB66511433B80E18B00938E2626A82BC70CC05E",
+ "0x693F46716EB6BC248876203756C9C7624BEA73736CA3984087789C1E"
+ "05A0C2D73AD3FF1CE67C39C4FDBD132C4ED7C8AD9808795BF230FA14",
+ 4,
+ },
+ {
+ /* (y^2 = x^3 + 156326*x^2 + x) */
+ "X448", 448, 0,
+ MPI_EC_MONTGOMERY, ECC_DIALECT_SAFECURVE,
+ "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+ "0x98A9",
+ "0x01",
+ "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+ "7CCA23E9C44EDB49AED63690216CC2728DC58F552378C292AB5844F3",
+ "0x00000000000000000000000000000000000000000000000000000000"
+ "00000000000000000000000000000000000000000000000000000005",
+ "0x7D235D1295F5B1F66C98AB6E58326FCECBAE5D34F55545D060F75DC2"
+ "8DF3F6EDB8027E2346430D211312C4B150677AF76FD7223D457B5B1A",
+ 4,
+ },
+#if 0 /* No real specs yet found. */
+ {
+ /* x^2 + y^2 = 1 + 3617x^2y^2 mod 2^414 - 17 */
+ "Curve3617",
+ "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEF",
+ MPI_EC_EDWARDS, 0,
+ "0x01",
+ "0x0e21",
+ "0x07FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEB3CC92414CF"
+ "706022B36F1C0338AD63CF181B0E71A5E106AF79",
+ "0x1A334905141443300218C0631C326E5FCD46369F44C03EC7F57FF35498A4AB4D"
+ "6D6BA111301A73FAA8537C64C4FD3812F3CBC595",
+ "0x22",
+ 8
+ },
+#endif /*0*/
+ {
+ "NIST P-192", 192, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xfffffffffffffffffffffffffffffffeffffffffffffffff",
+ "0xfffffffffffffffffffffffffffffffefffffffffffffffc",
+ "0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1",
+ "0xffffffffffffffffffffffff99def836146bc9b1b4d22831",
+
+ "0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012",
+ "0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811",
+ 1
+ },
+ {
+ "NIST P-224", 224, 1,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xffffffffffffffffffffffffffffffff000000000000000000000001",
+ "0xfffffffffffffffffffffffffffffffefffffffffffffffffffffffe",
+ "0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4",
+ "0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d" ,
+
+ "0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21",
+ "0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34",
+ 1
+ },
+ {
+ "NIST P-256", 256, 1,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff",
+ "0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc",
+ "0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b",
+ "0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551",
+
+ "0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296",
+ "0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5",
+ 1
+ },
+ {
+ "NIST P-384", 384, 1,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+ "ffffffff0000000000000000ffffffff",
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+ "ffffffff0000000000000000fffffffc",
+ "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875a"
+ "c656398d8a2ed19d2a85c8edd3ec2aef",
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf"
+ "581a0db248b0a77aecec196accc52973",
+
+ "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a38"
+ "5502f25dbf55296c3a545e3872760ab7",
+ "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c0"
+ "0a60b1ce1d7e819d7a431d7c90ea0e5f",
+ 1
+ },
+ {
+ "NIST P-521", 521, 1,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+ "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc",
+ "0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef10"
+ "9e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00",
+ "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "fffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409",
+
+ "0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d"
+ "3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66",
+ "0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e"
+ "662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650",
+ 1
+ },
+
+ { "brainpoolP160r1", 160, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xe95e4a5f737059dc60dfc7ad95b3d8139515620f",
+ "0x340e7be2a280eb74e2be61bada745d97e8f7c300",
+ "0x1e589a8595423412134faa2dbdec95c8d8675e58",
+ "0xe95e4a5f737059dc60df5991d45029409e60fc09",
+ "0xbed5af16ea3f6a4f62938c4631eb5af7bdbcdbc3",
+ "0x1667cb477a1a8ec338f94741669c976316da6321",
+ 1
+ },
+
+ { "brainpoolP192r1", 192, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xc302f41d932a36cda7a3463093d18db78fce476de1a86297",
+ "0x6a91174076b1e0e19c39c031fe8685c1cae040e5c69a28ef",
+ "0x469a28ef7c28cca3dc721d044f4496bcca7ef4146fbf25c9",
+ "0xc302f41d932a36cda7a3462f9e9e916b5be8f1029ac4acc1",
+ "0xc0a0647eaab6a48753b033c56cb0f0900a2f5c4853375fd6",
+ "0x14b690866abd5bb88b5f4828c1490002e6773fa2fa299b8f",
+ 1
+ },
+
+ { "brainpoolP224r1", 224, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xd7c134aa264366862a18302575d1d787b09f075797da89f57ec8c0ff",
+ "0x68a5e62ca9ce6c1c299803a6c1530b514e182ad8b0042a59cad29f43",
+ "0x2580f63ccfe44138870713b1a92369e33e2135d266dbb372386c400b",
+ "0xd7c134aa264366862a18302575d0fb98d116bc4b6ddebca3a5a7939f",
+ "0x0d9029ad2c7e5cf4340823b2a87dc68c9e4ce3174c1e6efdee12c07d",
+ "0x58aa56f772c0726f24c6b89e4ecdac24354b9e99caa3f6d3761402cd",
+ 1
+ },
+
+ { "brainpoolP256r1", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xa9fb57dba1eea9bc3e660a909d838d726e3bf623d52620282013481d1f6e5377",
+ "0x7d5a0975fc2c3057eef67530417affe7fb8055c126dc5c6ce94a4b44f330b5d9",
+ "0x26dc5c6ce94a4b44f330b5d9bbd77cbf958416295cf7e1ce6bccdc18ff8c07b6",
+ "0xa9fb57dba1eea9bc3e660a909d838d718c397aa3b561a6f7901e0e82974856a7",
+ "0x8bd2aeb9cb7e57cb2c4b482ffc81b7afb9de27e1e3bd23c23a4453bd9ace3262",
+ "0x547ef835c3dac4fd97f8461a14611dc9c27745132ded8e545c1d54c72f046997",
+ 1
+ },
+
+ { "brainpoolP320r1", 320, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xd35e472036bc4fb7e13c785ed201e065f98fcfa6f6f40def4f92b9ec7893ec28"
+ "fcd412b1f1b32e27",
+ "0x3ee30b568fbab0f883ccebd46d3f3bb8a2a73513f5eb79da66190eb085ffa9f4"
+ "92f375a97d860eb4",
+ "0x520883949dfdbc42d3ad198640688a6fe13f41349554b49acc31dccd88453981"
+ "6f5eb4ac8fb1f1a6",
+ "0xd35e472036bc4fb7e13c785ed201e065f98fcfa5b68f12a32d482ec7ee8658e9"
+ "8691555b44c59311",
+ "0x43bd7e9afb53d8b85289bcc48ee5bfe6f20137d10a087eb6e7871e2a10a599c7"
+ "10af8d0d39e20611",
+ "0x14fdd05545ec1cc8ab4093247f77275e0743ffed117182eaa9c77877aaac6ac7"
+ "d35245d1692e8ee1",
+ 1
+ },
+
+ { "brainpoolP384r1", 384, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b412b1da197fb71123"
+ "acd3a729901d1a71874700133107ec53",
+ "0x7bc382c63d8c150c3c72080ace05afa0c2bea28e4fb22787139165efba91f90f"
+ "8aa5814a503ad4eb04a8c7dd22ce2826",
+ "0x04a8c7dd22ce28268b39b55416f0447c2fb77de107dcd2a62e880ea53eeb62d5"
+ "7cb4390295dbc9943ab78696fa504c11",
+ "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b31f166e6cac0425a7"
+ "cf3ab6af6b7fc3103b883202e9046565",
+ "0x1d1c64f068cf45ffa2a63a81b7c13f6b8847a3e77ef14fe3db7fcafe0cbd10e8"
+ "e826e03436d646aaef87b2e247d4af1e",
+ "0x8abe1d7520f9c2a45cb1eb8e95cfd55262b70b29feec5864e19c054ff9912928"
+ "0e4646217791811142820341263c5315",
+ 1
+ },
+
+ { "brainpoolP512r1", 512, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330871"
+ "7d4d9b009bc66842aecda12ae6a380e62881ff2f2d82c68528aa6056583a48f3",
+ "0x7830a3318b603b89e2327145ac234cc594cbdd8d3df91610a83441caea9863bc"
+ "2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a72bf2c7b9e7c1ac4d77fc94ca",
+ "0x3df91610a83441caea9863bc2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a7"
+ "2bf2c7b9e7c1ac4d77fc94cadc083e67984050b75ebae5dd2809bd638016f723",
+ "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330870"
+ "553e5c414ca92619418661197fac10471db1d381085ddaddb58796829ca90069",
+ "0x81aee4bdd82ed9645a21322e9c4c6a9385ed9f70b5d916c1b43b62eef4d0098e"
+ "ff3b1f78e2d0d48d50d1687b93b97d5f7c6d5047406a5e688b352209bcb9f822",
+ "0x7dde385d566332ecc0eabfa9cf7822fdf209f70024a57b1aa000c55b881f8111"
+ "b2dcde494a5f485e5bca4bd88a2763aed1ca2b2fa8f0540678cd1e0f3ad80892",
+ 1
+ },
+ {
+ "GOST2001-test", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0x8000000000000000000000000000000000000000000000000000000000000431",
+ "0x0000000000000000000000000000000000000000000000000000000000000007",
+ "0x5fbff498aa938ce739b8e022fbafef40563f6e6a3472fc2a514c0ce9dae23b7e",
+ "0x8000000000000000000000000000000150fe8a1892976154c59cfc193accf5b3",
+
+ "0x0000000000000000000000000000000000000000000000000000000000000002",
+ "0x08e2a8a0e65147d4bd6316030e16d19c85c97f0a9ca267122b96abbcea7e8fc8",
+ 1
+ },
+ {
+ "GOST2001-CryptoPro-A", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd97",
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd94",
+ "0x00000000000000000000000000000000000000000000000000000000000000a6",
+ "0xffffffffffffffffffffffffffffffff6c611070995ad10045841b09b761b893",
+ "0x0000000000000000000000000000000000000000000000000000000000000001",
+ "0x8d91e471e0989cda27df505a453f2b7635294f2ddf23e3b122acc99c9e9f1e14",
+ 1
+ },
+ {
+ "GOST2001-CryptoPro-B", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0x8000000000000000000000000000000000000000000000000000000000000c99",
+ "0x8000000000000000000000000000000000000000000000000000000000000c96",
+ "0x3e1af419a269a5f866a7d3c25c3df80ae979259373ff2b182f49d4ce7e1bbc8b",
+ "0x800000000000000000000000000000015f700cfff1a624e5e497161bcc8a198f",
+ "0x0000000000000000000000000000000000000000000000000000000000000001",
+ "0x3fa8124359f96680b83d1c3eb2c070e5c545c9858d03ecfb744bf8d717717efc",
+ 1
+ },
+ {
+ "GOST2001-CryptoPro-C", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d759b",
+ "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d7598",
+ "0x000000000000000000000000000000000000000000000000000000000000805a",
+ "0x9b9f605f5a858107ab1ec85e6b41c8aa582ca3511eddfb74f02f3a6598980bb9",
+ "0x0000000000000000000000000000000000000000000000000000000000000000",
+ "0x41ece55743711a8c3cbf3783cd08c0ee4d4dc440d4641a8f366e550dfdb3bb67",
+ 1
+ },
+ {
+ "GOST2012-256-A", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd97",
+ "0xc2173f1513981673af4892c23035a27ce25e2013bf95aa33b22c656f277e7335",
+ "0x295f9bae7428ed9ccc20e7c359a9d41a22fccd9108e17bf7ba9337a6f8ae9513",
+ "0x400000000000000000000000000000000fd8cddfc87b6635c115af556c360c67",
+ "0x91e38443a5e82c0d880923425712b2bb658b9196932e02c78b2582fe742daa28",
+ "0x32879423ab1a0375895786c4bb46e9565fde0b5344766740af268adb32322e5c",
+ 4
+ },
+ {
+ "GOST2012-512-test", 511, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d"
+ "f1d852741af4704a0458047e80e4546d35b8336fac224dd81664bbf528be6373",
+ "0x0000000000000000000000000000000000000000000000000000000000000007",
+ "0x1cff0806a31116da29d8cfa54e57eb748bc5f377e49400fdd788b649eca1ac4"
+ "361834013b2ad7322480a89ca58e0cf74bc9e540c2add6897fad0a3084f302adc",
+ "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d"
+ "a82f2d7ecb1dbac719905c5eecc423f1d86e25edbe23c595d644aaf187e6e6df",
+
+ "0x24d19cc64572ee30f396bf6ebbfd7a6c5213b3b3d7057cc825f91093a68cd762"
+ "fd60611262cd838dc6b60aa7eee804e28bc849977fac33b4b530f1b120248a9a",
+ "0x2bb312a43bd2ce6e0d020613c857acddcfbf061e91e5f2c3f32447c259f39b2"
+ "c83ab156d77f1496bf7eb3351e1ee4e43dc1a18b91b24640b6dbb92cb1add371e",
+ 1
+ },
+ {
+ "GOST2012-512-tc26-A", 512, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc7",
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc4",
+ "0xe8c2505dedfc86ddc1bd0b2b6667f1da34b82574761cb0e879bd081cfd0b6265"
+ "ee3cb090f30d27614cb4574010da90dd862ef9d4ebee4761503190785a71c760",
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "27e69532f48d89116ff22b8d4e0560609b4b38abfad2b85dcacdb1411f10b275",
+ "0x0000000000000000000000000000000000000000000000000000000000000000"
+ "0000000000000000000000000000000000000000000000000000000000000003",
+ "0x7503cfe87a836ae3a61b8816e25450e6ce5e1c93acf1abc1778064fdcbefa921"
+ "df1626be4fd036e93d75e6a50e3a41e98028fe5fc235f5b889a589cb5215f2a4",
+ 1
+ },
+ {
+ "GOST2012-512-tc26-B", 512, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0x8000000000000000000000000000000000000000000000000000000000000000"
+ "000000000000000000000000000000000000000000000000000000000000006f",
+ "0x8000000000000000000000000000000000000000000000000000000000000000"
+ "000000000000000000000000000000000000000000000000000000000000006c",
+ "0x687d1b459dc841457e3e06cf6f5e2517b97c7d614af138bcbf85dc806c4b289f"
+ "3e965d2db1416d217f8b276fad1ab69c50f78bee1fa3106efb8ccbc7c5140116",
+ "0x8000000000000000000000000000000000000000000000000000000000000001"
+ "49a1ec142565a545acfdb77bd9d40cfa8b996712101bea0ec6346c54374f25bd",
+ "0x0000000000000000000000000000000000000000000000000000000000000000"
+ "0000000000000000000000000000000000000000000000000000000000000002",
+ "0x1a8f7eda389b094c2c071e3647a8940f3c123b697578c213be6dd9e6c8ec7335"
+ "dcb228fd1edf4a39152cbcaaf8c0398828041055f94ceeec7e21340780fe41bd",
+ 1
+ },
+ {
+ "GOST2012-512-tc26-C", 512, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc7",
+ "0xdc9203e514a721875485a529d2c722fb187bc8980eb866644de41c68e1430645"
+ "46e861c0e2c9edd92ade71f46fcf50ff2ad97f951fda9f2a2eb6546f39689bd3",
+ "0xb4c4ee28cebc6c2c8ac12952cf37f16ac7efb6a9f69f4b57ffda2e4f0de5ade0"
+ "38cbc2fff719d2c18de0284b8bfef3b52b8cc7a5f5bf0a3c8d2319a5312557e1",
+ "0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+ "c98cdba46506ab004c33a9ff5147502cc8eda9e7a769a12694623cef47f023ed",
+ "0xe2e31edfc23de7bdebe241ce593ef5de2295b7a9cbaef021d385f7074cea043a"
+ "a27272a7ae602bf2a7b9033db9ed3610c6fb85487eae97aac5bc7928c1950148",
+ "0xf5ce40d95b5eb899abbccff5911cb8577939804d6527378b8c108c3d2090ff9be"
+ "18e2d33e3021ed2ef32d85822423b6304f726aa854bae07d0396e9a9addc40f",
+ 4
+ },
+
+ {
+ "secp256k1", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F",
+ "0x0000000000000000000000000000000000000000000000000000000000000000",
+ "0x0000000000000000000000000000000000000000000000000000000000000007",
+ "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141",
+ "0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798",
+ "0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8",
+ 1
+ },
+
+ {
+ "sm2p256v1", 256, 0,
+ MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+ "0xfffffffeffffffffffffffffffffffffffffffff00000000ffffffffffffffff",
+ "0xfffffffeffffffffffffffffffffffffffffffff00000000fffffffffffffffc",
+ "0x28e9fa9e9d9f5e344d5a9e4bcf6509a7f39789f515ab8f92ddbcbd414d940e93",
+ "0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54123",
+ "0x32c4ae2c1f1981195f9904466a39c9948fe30bbff2660be1715a4589334c74c7",
+ "0xbc3736a2f4f6779c59bdcee36b692153d0a9877cc62a474002df32e52139f0a0",
+ 1
+ },
+
+ { NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }
+ };
+
+
+
+
+/* Return a copy of POINT. */
+static gcry_mpi_point_t
+point_copy (gcry_mpi_point_t point)
+{
+ gcry_mpi_point_t newpoint;
+
+ if (point)
+ {
+ newpoint = mpi_point_new (0);
+ point_set (newpoint, point);
+ }
+ else
+ newpoint = NULL;
+ return newpoint;
+}
+
+
+/* Helper to scan a hex string. */
+static gcry_mpi_t
+scanval (const char *string)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t val;
+
+ rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL);
+ if (rc)
+ log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc));
+ return val;
+}
+
+
+/* Return the index of the domain_parms table for a curve with NAME.
+ Return -1 if not found. */
+static int
+find_domain_parms_idx (const char *name)
+{
+ int idx, aliasno;
+
+ /* First check our native curves. */
+ for (idx = 0; domain_parms[idx].desc; idx++)
+ if (!strcmp (name, domain_parms[idx].desc))
+ return idx;
+
+ /* If not found consult the alias table. */
+ if (!domain_parms[idx].desc)
+ {
+ for (aliasno = 0; curve_aliases[aliasno].name; aliasno++)
+ if (!strcmp (name, curve_aliases[aliasno].other))
+ break;
+ if (curve_aliases[aliasno].name)
+ {
+ for (idx = 0; domain_parms[idx].desc; idx++)
+ if (!strcmp (curve_aliases[aliasno].name, domain_parms[idx].desc))
+ return idx;
+ }
+ }
+
+ return -1;
+}
+
+
+/* Generate the crypto system setup. This function takes the NAME of
+ a curve or the desired number of bits and stores at R_CURVE the
+ parameters of the named curve or those of a suitable curve. If
+ R_NBITS is not NULL, the chosen number of bits is stored there.
+ NULL may be given for R_CURVE, if the value is not required and for
+ example only a quick test for availability is desired. Note that
+ the curve fields should be initialized to zero because fields which
+ are not NULL are skipped. */
+gpg_err_code_t
+_gcry_ecc_fill_in_curve (unsigned int nbits, const char *name,
+ elliptic_curve_t *curve, unsigned int *r_nbits)
+{
+ int idx;
+ const char *resname = NULL; /* Set to a found curve name. */
+
+ if (name)
+ idx = find_domain_parms_idx (name);
+ else
+ {
+ for (idx = 0; domain_parms[idx].desc; idx++)
+ if (nbits == domain_parms[idx].nbits
+ && domain_parms[idx].model == MPI_EC_WEIERSTRASS)
+ break;
+ if (!domain_parms[idx].desc)
+ idx = -1;
+ }
+ if (idx < 0)
+ return GPG_ERR_UNKNOWN_CURVE;
+
+ resname = domain_parms[idx].desc;
+
+ /* In fips mode we only support NIST curves. Note that it is
+ possible to bypass this check by specifying the curve parameters
+ directly. */
+ if (fips_mode () && !domain_parms[idx].fips )
+ return GPG_ERR_NOT_SUPPORTED;
+
+ switch (domain_parms[idx].model)
+ {
+ case MPI_EC_WEIERSTRASS:
+ case MPI_EC_EDWARDS:
+ case MPI_EC_MONTGOMERY:
+ break;
+ default:
+ return GPG_ERR_BUG;
+ }
+
+
+ if (r_nbits)
+ *r_nbits = domain_parms[idx].nbits;
+
+ if (curve)
+ {
+ curve->model = domain_parms[idx].model;
+ curve->dialect = domain_parms[idx].dialect;
+ if (!curve->p)
+ curve->p = scanval (domain_parms[idx].p);
+ if (!curve->a)
+ {
+ curve->a = scanval (domain_parms[idx].a);
+ if (curve->a->sign)
+ {
+ mpi_resize (curve->a, curve->p->nlimbs);
+ _gcry_mpih_sub_n (curve->a->d, curve->p->d,
+ curve->a->d, curve->p->nlimbs);
+ curve->a->nlimbs = curve->p->nlimbs;
+ curve->a->sign = 0;
+ }
+ }
+ if (!curve->b)
+ {
+ curve->b = scanval (domain_parms[idx].b);
+ if (curve->b->sign)
+ {
+ mpi_resize (curve->b, curve->p->nlimbs);
+ _gcry_mpih_sub_n (curve->b->d, curve->p->d,
+ curve->b->d, curve->p->nlimbs);
+ curve->b->nlimbs = curve->p->nlimbs;
+ curve->b->sign = 0;
+ }
+ }
+ if (!curve->n)
+ curve->n = scanval (domain_parms[idx].n);
+ if (!curve->G.x)
+ curve->G.x = scanval (domain_parms[idx].g_x);
+ if (!curve->G.y)
+ curve->G.y = scanval (domain_parms[idx].g_y);
+ curve->h = domain_parms[idx].h;
+
+ /*
+ * In the constants of domain_parms, we defined Curve25519
+ * domain parameters as the ones in RFC-7748 before the errata
+ * (eid4730). To keep the computation having exact same values,
+ * we recover the new value of g_y, here.
+ */
+ if (!strcmp (resname, "Curve25519"))
+ mpi_sub (curve->G.y, curve->p, curve->G.y);
+
+ if (!curve->G.z)
+ curve->G.z = mpi_alloc_set_ui (1);
+ if (!curve->name)
+ curve->name = resname;
+ }
+
+ return 0;
+}
+
+
+/* Give the name of the curve NAME, store the curve parameters into P,
+ A, B, G, and N if they point to NULL value. Note that G is
+ returned in standard uncompressed format. Also update MODEL and
+ DIALECT if they are not NULL. */
+gpg_err_code_t
+_gcry_ecc_update_curve_param (const char *name,
+ enum gcry_mpi_ec_models *model,
+ enum ecc_dialects *dialect,
+ gcry_mpi_t *p, gcry_mpi_t *a, gcry_mpi_t *b,
+ gcry_mpi_t *g, gcry_mpi_t *n)
+{
+ int idx;
+
+ idx = find_domain_parms_idx (name);
+ if (idx < 0)
+ return GPG_ERR_UNKNOWN_CURVE;
+
+ if (g)
+ {
+ char *buf;
+ size_t len;
+
+ len = 4;
+ len += strlen (domain_parms[idx].g_x+2);
+ len += strlen (domain_parms[idx].g_y+2);
+ len++;
+ buf = xtrymalloc (len);
+ if (!buf)
+ return gpg_err_code_from_syserror ();
+ strcpy (stpcpy (stpcpy (buf, "0x04"), domain_parms[idx].g_x+2),
+ domain_parms[idx].g_y+2);
+ _gcry_mpi_release (*g);
+ *g = scanval (buf);
+ xfree (buf);
+ }
+ if (model)
+ *model = domain_parms[idx].model;
+ if (dialect)
+ *dialect = domain_parms[idx].dialect;
+ if (p)
+ {
+ _gcry_mpi_release (*p);
+ *p = scanval (domain_parms[idx].p);
+ }
+ if (a)
+ {
+ _gcry_mpi_release (*a);
+ *a = scanval (domain_parms[idx].a);
+ }
+ if (b)
+ {
+ _gcry_mpi_release (*b);
+ *b = scanval (domain_parms[idx].b);
+ }
+ if (n)
+ {
+ _gcry_mpi_release (*n);
+ *n = scanval (domain_parms[idx].n);
+ }
+ return 0;
+}
+
+
+/* Return the name matching the parameters in PKEY. This works only
+ with curves described by the Weierstrass equation. */
+const char *
+_gcry_ecc_get_curve (gcry_sexp_t keyparms, int iterator, unsigned int *r_nbits)
+{
+ gpg_err_code_t rc;
+ const char *result = NULL;
+ elliptic_curve_t E;
+ gcry_mpi_point_t G = NULL;
+ gcry_mpi_t tmp = NULL;
+ int idx;
+
+ memset (&E, 0, sizeof E);
+
+ if (r_nbits)
+ *r_nbits = 0;
+
+ if (!keyparms)
+ {
+ idx = iterator;
+ if (idx >= 0 && idx < DIM (domain_parms))
+ {
+ result = domain_parms[idx].desc;
+ if (r_nbits)
+ *r_nbits = domain_parms[idx].nbits;
+ }
+ return result;
+ }
+
+
+ /*
+ * Extract the curve parameters..
+ */
+ rc = gpg_err_code (sexp_extract_param (keyparms, NULL, "pabn",
+ &E.p, &E.a, &E.b, &E.n, NULL));
+ if (rc == GPG_ERR_NO_OBJ)
+ {
+ /* This might be the second use case of checking whether a
+ specific curve given by name is supported. */
+ gcry_sexp_t l1;
+ char *name;
+
+ l1 = sexp_find_token (keyparms, "curve", 5);
+ if (!l1)
+ goto leave; /* No curve name parameter. */
+
+ name = sexp_nth_string (l1, 1);
+ sexp_release (l1);
+ if (!name)
+ goto leave; /* Name missing or out of core. */
+
+ idx = find_domain_parms_idx (name);
+ xfree (name);
+ if (idx >= 0) /* Curve found. */
+ {
+ result = domain_parms[idx].desc;
+ if (r_nbits)
+ *r_nbits = domain_parms[idx].nbits;
+ }
+ return result;
+ }
+
+ if (rc)
+ goto leave;
+
+ rc = point_from_keyparam (&G, keyparms, "g", NULL);
+ if (rc)
+ goto leave;
+
+ _gcry_mpi_point_init (&E.G);
+ _gcry_mpi_point_set (&E.G, G->x, G->y, G->z);
+
+ for (idx = 0; domain_parms[idx].desc; idx++)
+ {
+ mpi_free (tmp);
+ tmp = scanval (domain_parms[idx].p);
+ if (mpi_cmp (tmp, E.p))
+ continue;
+
+ mpi_free (tmp);
+ tmp = scanval (domain_parms[idx].a);
+ if (tmp->sign)
+ {
+ if (!mpi_cmpabs (tmp, E.a))
+ /* For backward compatibility to <= libgcrypt 1.8, we
+ allow this match to support existing keys in SEXP. */
+ ;
+ else
+ {
+ mpi_resize (tmp, E.p->nlimbs);
+ _gcry_mpih_sub_n (tmp->d, E.p->d,
+ tmp->d, E.p->nlimbs);
+ tmp->nlimbs = E.p->nlimbs;
+ tmp->sign = 0;
+ if (mpi_cmp (tmp, E.a))
+ continue;
+ }
+ }
+ else if (mpi_cmp (tmp, E.a))
+ continue;
+
+ mpi_free (tmp);
+ tmp = scanval (domain_parms[idx].b);
+ if (tmp->sign)
+ {
+ if (!mpi_cmpabs (tmp, E.b))
+ /* Same for backward compatibility, see above. */
+ ;
+ else
+ {
+ mpi_resize (tmp, E.p->nlimbs);
+ _gcry_mpih_sub_n (tmp->d, E.p->d,
+ tmp->d, E.p->nlimbs);
+ tmp->nlimbs = E.p->nlimbs;
+ tmp->sign = 0;
+ if (mpi_cmp (tmp, E.b))
+ continue;
+ }
+ }
+ else if (mpi_cmp (tmp, E.b))
+ continue;
+
+ mpi_free (tmp);
+ tmp = scanval (domain_parms[idx].n);
+ if (mpi_cmp (tmp, E.n))
+ continue;
+
+ mpi_free (tmp);
+ tmp = scanval (domain_parms[idx].g_x);
+ if (mpi_cmp (tmp, E.G.x))
+ continue;
+
+ mpi_free (tmp);
+ tmp = scanval (domain_parms[idx].g_y);
+ if (mpi_cmp (tmp, E.G.y))
+ continue;
+
+ result = domain_parms[idx].desc;
+ if (r_nbits)
+ *r_nbits = domain_parms[idx].nbits;
+ break;
+ }
+
+ leave:
+ _gcry_mpi_point_release (G);
+ _gcry_mpi_release (tmp);
+ _gcry_mpi_release (E.p);
+ _gcry_mpi_release (E.a);
+ _gcry_mpi_release (E.b);
+ _gcry_mpi_point_free_parts (&E.G);
+ _gcry_mpi_release (E.n);
+ return result;
+}
+
+
+/* Helper to extract an MPI from key parameters. */
+static gpg_err_code_t
+mpi_from_keyparam (gcry_mpi_t *r_a, gcry_sexp_t keyparam, const char *name,
+ int opaque)
+{
+ gcry_err_code_t ec = 0;
+ gcry_sexp_t l1;
+
+ l1 = sexp_find_token (keyparam, name, 0);
+ if (l1)
+ {
+ *r_a = sexp_nth_mpi (l1, 1, opaque? GCRYMPI_FMT_OPAQUE : GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ if (!*r_a)
+ ec = GPG_ERR_INV_OBJ;
+ }
+ return ec;
+}
+
+/* Helper to extract a point from key parameters. If no parameter
+ with NAME is found, the functions tries to find a non-encoded point
+ by appending ".x", ".y" and ".z" to NAME. ".z" is in this case
+ optional and defaults to 1. EC is the context which at this point
+ may not be fully initialized. */
+static gpg_err_code_t
+point_from_keyparam (gcry_mpi_point_t *r_a,
+ gcry_sexp_t keyparam, const char *name, mpi_ec_t ec)
+{
+ gcry_err_code_t rc;
+ gcry_sexp_t l1;
+ gcry_mpi_point_t point;
+
+ l1 = sexp_find_token (keyparam, name, 0);
+ if (l1)
+ {
+ gcry_mpi_t a;
+
+ a = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_OPAQUE);
+ sexp_release (l1);
+ if (!a)
+ return GPG_ERR_INV_OBJ;
+
+ point = mpi_point_new (0);
+ rc = _gcry_mpi_ec_decode_point (point, a, ec);
+ mpi_free (a);
+ if (rc)
+ {
+ mpi_point_release (point);
+ return rc;
+ }
+ }
+ else
+ {
+ char *tmpname;
+ gcry_mpi_t x = NULL;
+ gcry_mpi_t y = NULL;
+ gcry_mpi_t z = NULL;
+
+ tmpname = xtrymalloc (strlen (name) + 2 + 1);
+ if (!tmpname)
+ return gpg_err_code_from_syserror ();
+ strcpy (stpcpy (tmpname, name), ".x");
+ rc = mpi_from_keyparam (&x, keyparam, tmpname, 0);
+ if (rc)
+ {
+ xfree (tmpname);
+ return rc;
+ }
+ strcpy (stpcpy (tmpname, name), ".y");
+ rc = mpi_from_keyparam (&y, keyparam, tmpname, 0);
+ if (rc)
+ {
+ mpi_free (x);
+ xfree (tmpname);
+ return rc;
+ }
+ strcpy (stpcpy (tmpname, name), ".z");
+ rc = mpi_from_keyparam (&z, keyparam, tmpname, 0);
+ if (rc)
+ {
+ mpi_free (y);
+ mpi_free (x);
+ xfree (tmpname);
+ return rc;
+ }
+ if (!z)
+ z = mpi_set_ui (NULL, 1);
+ if (x && y)
+ point = mpi_point_snatch_set (NULL, x, y, z);
+ else
+ {
+ mpi_free (x);
+ mpi_free (y);
+ mpi_free (z);
+ point = NULL;
+ }
+ xfree (tmpname);
+ }
+
+ if (point)
+ *r_a = point;
+ return 0;
+}
+
+
+
+static gpg_err_code_t
+mpi_ec_get_elliptic_curve (elliptic_curve_t *E, int *r_flags,
+ gcry_sexp_t keyparam, const char *curvename)
+{
+ gpg_err_code_t errc;
+ unsigned int nbits;
+ gcry_sexp_t l1;
+
+ errc = _gcry_pk_util_get_nbits (keyparam, &nbits);
+ if (errc)
+ return errc;
+
+ E->model = MPI_EC_WEIERSTRASS;
+ E->dialect = ECC_DIALECT_STANDARD;
+ E->h = 1;
+
+ if (keyparam)
+ {
+ /* Parse an optional flags list. */
+ l1 = sexp_find_token (keyparam, "flags", 0);
+ if (l1)
+ {
+ int flags = 0;
+
+ errc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+ sexp_release (l1);
+ l1 = NULL;
+ if (errc)
+ goto leave;
+
+ *r_flags |= flags;
+ }
+
+ /* Parse the deprecated optional transient-key flag. */
+ l1 = sexp_find_token (keyparam, "transient-key", 0);
+ if (l1)
+ {
+ *r_flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+ sexp_release (l1);
+ }
+
+ /* Check whether a curve name was given. */
+ l1 = sexp_find_token (keyparam, "curve", 5);
+
+ /* If we don't have a curve name or if override parameters have
+ explicitly been requested, parse them. */
+ if (!l1 || (*r_flags & PUBKEY_FLAG_PARAM))
+ {
+ gcry_mpi_point_t G = NULL;
+ gcry_mpi_t cofactor = NULL;
+
+ errc = mpi_from_keyparam (&E->p, keyparam, "p", 0);
+ if (errc)
+ goto leave;
+ errc = mpi_from_keyparam (&E->a, keyparam, "a", 0);
+ if (errc)
+ goto leave;
+ errc = mpi_from_keyparam (&E->b, keyparam, "b", 0);
+ if (errc)
+ goto leave;
+ errc = point_from_keyparam (&G, keyparam, "g", NULL);
+ if (errc)
+ goto leave;
+ if (G)
+ {
+ _gcry_mpi_point_init (&E->G);
+ mpi_point_set (&E->G, G->x, G->y, G->z);
+ mpi_point_set (G, NULL, NULL, NULL);
+ mpi_point_release (G);
+ }
+ errc = mpi_from_keyparam (&E->n, keyparam, "n", 0);
+ if (errc)
+ goto leave;
+ errc = mpi_from_keyparam (&cofactor, keyparam, "h", 0);
+ if (errc)
+ goto leave;
+ if (cofactor)
+ {
+ mpi_get_ui (&E->h, cofactor);
+ mpi_free (cofactor);
+ }
+ }
+ }
+ else
+ l1 = NULL; /* No curvename. */
+
+ /* Check whether a curve parameter is available and use that to fill
+ in missing values. If no curve parameter is available try an
+ optional provided curvename. If only the curvename has been
+ given use that one. */
+ if (l1 || curvename || nbits)
+ {
+ char *name;
+
+ if (l1)
+ {
+ name = sexp_nth_string (l1, 1);
+ sexp_release (l1);
+ if (!name)
+ {
+ errc = GPG_ERR_INV_OBJ; /* Name missing or out of core. */
+ goto leave;
+ }
+ }
+ else
+ name = NULL;
+
+ errc = _gcry_ecc_fill_in_curve (nbits, name? name : curvename, E, NULL);
+ xfree (name);
+ if (errc)
+ goto leave;
+ }
+
+ leave:
+ return errc;
+}
+
+static gpg_err_code_t
+mpi_ec_setup_elliptic_curve (mpi_ec_t ec, int flags,
+ elliptic_curve_t *E, gcry_sexp_t keyparam)
+{
+ gpg_err_code_t errc = 0;
+
+ ec->G = mpi_point_snatch_set (NULL, E->G.x, E->G.y, E->G.z);
+ E->G.x = NULL;
+ E->G.y = NULL;
+ E->G.z = NULL;
+ ec->n = E->n;
+ E->n = NULL;
+ ec->h = E->h;
+ ec->name = E->name;
+
+ /* Now that we know the curve name we can look for the public key
+ Q. point_from_keyparam needs to know the curve parameters so
+ that it is able to use the correct decompression. Parsing
+ the private key D could have been done earlier but it is less
+ surprising if we do it here as well. */
+ if (keyparam)
+ {
+ int is_opaque_bytes = ((ec->dialect == ECC_DIALECT_ED25519
+ && (flags & PUBKEY_FLAG_EDDSA))
+ || (ec->dialect == ECC_DIALECT_SAFECURVE));
+
+ errc = point_from_keyparam (&ec->Q, keyparam, "q", ec);
+ if (errc)
+ return errc;
+ errc = mpi_from_keyparam (&ec->d, keyparam, "d", is_opaque_bytes);
+
+ /* Size of opaque bytes should match size of P. */
+ if (!errc && ec->d && is_opaque_bytes)
+ {
+ unsigned int n = mpi_get_nbits (ec->d);
+ unsigned int len;
+
+ len = (ec->nbits+7)/8;
+ /* EdDSA requires additional bit for sign. */
+ if ((ec->nbits%8) == 0 && ec->model == MPI_EC_EDWARDS)
+ len++;
+
+ if ((n+7)/8 != len)
+ {
+ if (ec->dialect == ECC_DIALECT_ED25519)
+ {
+ /*
+ * GnuPG (<= 2.2) or OpenPGP implementations with no
+ * SOS support may remove zeros at the beginning.
+ * Recover those zeros.
+ */
+ /*
+ * Also, GnuPG (<= 2.2) may add additional zero at
+ * the beginning, when private key is moved from
+ * OpenPGP to gpg-agent. Remove such a zero-prefix.
+ */
+ const unsigned char *buf;
+ unsigned char *value;
+
+ buf = mpi_get_opaque (ec->d, &n);
+ if (!buf)
+ return GPG_ERR_INV_OBJ;
+
+ value = xtrymalloc_secure (len);
+ if (!value)
+ return gpg_err_code_from_syserror ();
+
+ if ((n+7)/8 < len)
+ /* Recover zeros. */
+ {
+ memset (value, 0, len - (n+7)/8);
+ memcpy (value + len - (n+7)/8, buf, (n+7)/8);
+ }
+ else if ((n+7)/8 == len + 1)
+ /* Remove a zero. */
+ memcpy (value, buf+1, len);
+ else
+ {
+ xfree (value);
+ return GPG_ERR_INV_OBJ;
+ }
+
+ mpi_set_opaque (ec->d, value, len*8);
+ }
+ else
+ {
+ if (DBG_CIPHER)
+ log_debug ("scalar size (%d) != prime size (%d)",
+ (n+7)/8, len);
+
+ errc = GPG_ERR_INV_OBJ;
+ }
+ }
+ }
+ }
+
+ return errc;
+}
+
+gpg_err_code_t
+_gcry_mpi_ec_internal_new (mpi_ec_t *r_ec, int *r_flags, const char *name_op,
+ gcry_sexp_t keyparam, const char *curvename)
+{
+ gpg_err_code_t errc;
+ elliptic_curve_t E;
+ mpi_ec_t ec;
+
+ *r_ec = NULL;
+
+ memset (&E, 0, sizeof E);
+ errc = mpi_ec_get_elliptic_curve (&E, r_flags, keyparam, curvename);
+ if (errc)
+ goto leave;
+
+ ec = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, *r_flags,
+ E.p, E.a, E.b);
+ if (!ec)
+ goto leave;
+
+ errc = mpi_ec_setup_elliptic_curve (ec, *r_flags, &E, keyparam);
+ if (errc)
+ {
+ _gcry_mpi_ec_free (ec);
+ goto leave;
+ }
+ else
+ *r_ec = ec;
+
+ if (!errc && DBG_CIPHER)
+ {
+ gcry_mpi_t mpi_q = NULL;
+ gcry_sexp_t l1;
+ char msg[80];
+
+ l1 = sexp_find_token (keyparam, "q", 0);
+ if (l1)
+ {
+ mpi_q = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_OPAQUE);
+ sexp_release (l1);
+ }
+
+ log_debug ("%s info: %s/%s%s\n", name_op,
+ _gcry_ecc_model2str (ec->model),
+ _gcry_ecc_dialect2str (ec->dialect),
+ (*r_flags & PUBKEY_FLAG_EDDSA)? "+EdDSA" : "");
+ if (ec->name)
+ log_debug ("%s name: %s\n", name_op, ec->name);
+ snprintf (msg, sizeof msg, "%s p", name_op);
+ log_printmpi (msg, ec->p);
+ snprintf (msg, sizeof msg, "%s a", name_op);
+ log_printmpi (msg, ec->a);
+ snprintf (msg, sizeof msg, "%s b", name_op);
+ log_printmpi (msg, ec->b);
+ snprintf (msg, sizeof msg, "%s g", name_op);
+ log_printpnt (msg, ec->G, NULL);
+ snprintf (msg, sizeof msg, "%s n", name_op);
+ log_printmpi (msg, ec->n);
+ log_debug ("%s h:+%02x\n", name_op, ec->h);
+ if (mpi_q)
+ {
+ snprintf (msg, sizeof msg, "%s q", name_op);
+ log_printmpi (msg, mpi_q);
+ mpi_free (mpi_q);
+ }
+ if (!fips_mode () && ec->d)
+ {
+ snprintf (msg, sizeof msg, "%s d", name_op);
+ log_printmpi (msg, ec->d);
+ }
+ }
+
+ leave:
+ _gcry_ecc_curve_free (&E);
+ return errc;
+}
+
+/* This function creates a new context for elliptic curve operations.
+ Either KEYPARAM or CURVENAME must be given. If both are given and
+ KEYPARAM has no curve parameter, CURVENAME is used to add missing
+ parameters. On success 0 is returned and the new context stored at
+ R_CTX. On error NULL is stored at R_CTX and an error code is
+ returned. The context needs to be released using
+ gcry_ctx_release. */
+gpg_err_code_t
+_gcry_mpi_ec_new (gcry_ctx_t *r_ctx,
+ gcry_sexp_t keyparam, const char *curvename)
+{
+ gpg_err_code_t errc;
+ elliptic_curve_t E;
+ gcry_ctx_t ctx = NULL;
+ int flags = 0;
+ mpi_ec_t ec;
+
+ *r_ctx = NULL;
+
+ memset (&E, 0, sizeof E);
+ errc = mpi_ec_get_elliptic_curve (&E, &flags, keyparam, curvename);
+ if (errc)
+ goto leave;
+
+ errc = _gcry_mpi_ec_p_new (&ctx, E.model, E.dialect, flags, E.p, E.a, E.b);
+ if (errc)
+ goto leave;
+
+ ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+ errc = mpi_ec_setup_elliptic_curve (ec, flags, &E, keyparam);
+ if (errc)
+ goto leave;
+
+ *r_ctx = ctx;
+ ctx = NULL;
+
+ leave:
+ _gcry_ecc_curve_free (&E);
+ _gcry_ctx_release (ctx);
+ return errc;
+}
+
+
+/* Return the parameters of the curve NAME as an S-expression. */
+gcry_sexp_t
+_gcry_ecc_get_param_sexp (const char *name)
+{
+ unsigned int nbits;
+ elliptic_curve_t E;
+ mpi_ec_t ctx;
+ gcry_mpi_t g_x, g_y;
+ gcry_mpi_t pkey[5];
+ gcry_sexp_t result;
+ int i;
+
+ memset (&E, 0, sizeof E);
+ if (_gcry_ecc_fill_in_curve (0, name, &E, &nbits))
+ return NULL;
+
+ g_x = mpi_new (0);
+ g_y = mpi_new (0);
+ ctx = _gcry_mpi_ec_p_internal_new (E.model,
+ E.dialect,
+ 0,
+ E.p, E.a, E.b);
+ if (_gcry_mpi_ec_get_affine (g_x, g_y, &E.G, ctx))
+ log_fatal ("ecc get param: Failed to get affine coordinates\n");
+ _gcry_mpi_ec_free (ctx);
+ _gcry_mpi_point_free_parts (&E.G);
+
+ pkey[0] = E.p;
+ pkey[1] = E.a;
+ pkey[2] = E.b;
+ pkey[3] = _gcry_ecc_ec2os (g_x, g_y, E.p);
+ pkey[4] = E.n;
+
+ mpi_free (g_x);
+ mpi_free (g_y);
+
+ if (sexp_build (&result, NULL,
+ "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)))",
+ pkey[0], pkey[1], pkey[2], pkey[3], pkey[4], E.h))
+ result = NULL;
+
+ for (i=0; i < DIM (pkey); i++)
+ _gcry_mpi_release (pkey[i]);
+
+ return result;
+}
+
+
+/* Return an MPI (or opaque MPI) described by NAME and the context EC.
+ If COPY is true a copy is returned, if not a const MPI may be
+ returned. In any case mpi_free must be used. */
+gcry_mpi_t
+_gcry_ecc_get_mpi (const char *name, mpi_ec_t ec, int copy)
+{
+ if (!*name)
+ return NULL;
+
+ if (!strcmp (name, "p") && ec->p)
+ return mpi_is_const (ec->p) && !copy? ec->p : mpi_copy (ec->p);
+ if (!strcmp (name, "a") && ec->a)
+ return mpi_is_const (ec->a) && !copy? ec->a : mpi_copy (ec->a);
+ if (!strcmp (name, "b") && ec->b)
+ return mpi_is_const (ec->b) && !copy? ec->b : mpi_copy (ec->b);
+ if (!strcmp (name, "n") && ec->n)
+ return mpi_is_const (ec->n) && !copy? ec->n : mpi_copy (ec->n);
+ if (!strcmp (name, "h"))
+ {
+ gcry_mpi_t h = _gcry_mpi_get_const (ec->h);
+
+ return !copy? h : mpi_set (NULL, h);
+ }
+ if (!strcmp (name, "d") && ec->d)
+ return mpi_is_const (ec->d) && !copy? ec->d : mpi_copy (ec->d);
+
+ /* Return a requested point coordinate. */
+ if (!strcmp (name, "g.x") && ec->G && ec->G->x)
+ return mpi_is_const (ec->G->x) && !copy? ec->G->x : mpi_copy (ec->G->x);
+ if (!strcmp (name, "g.y") && ec->G && ec->G->y)
+ return mpi_is_const (ec->G->y) && !copy? ec->G->y : mpi_copy (ec->G->y);
+ if (!strcmp (name, "q.x") && ec->Q && ec->Q->x)
+ return mpi_is_const (ec->Q->x) && !copy? ec->Q->x : mpi_copy (ec->Q->x);
+ if (!strcmp (name, "q.y") && ec->Q && ec->Q->y)
+ return mpi_is_const (ec->Q->y) && !copy? ec->Q->y : mpi_copy (ec->Q->y);
+
+ /* If the base point has been requested, return it in standard
+ encoding. */
+ if (!strcmp (name, "g") && ec->G)
+ return _gcry_mpi_ec_ec2os (ec->G, ec);
+
+ /* If the public key has been requested, return it by default in
+ standard uncompressed encoding or if requested in other
+ encodings. */
+ if (*name == 'q' && (!name[1] || name[1] == '@'))
+ {
+ /* If only the private key is given, compute the public key. */
+ if (!ec->Q)
+ ec->Q = _gcry_ecc_compute_public (NULL, ec);
+
+ if (!ec->Q)
+ return NULL;
+
+ if (name[1] != '@')
+ return _gcry_mpi_ec_ec2os (ec->Q, ec);
+
+ if (!strcmp (name+2, "eddsa") && ec->model == MPI_EC_EDWARDS)
+ {
+ unsigned char *encpk;
+ unsigned int encpklen;
+
+ if (!_gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0,
+ &encpk, &encpklen))
+ return mpi_set_opaque (NULL, encpk, encpklen*8);
+ }
+ }
+
+ return NULL;
+}
+
+
+/* Return a point described by NAME and the context EC. */
+gcry_mpi_point_t
+_gcry_ecc_get_point (const char *name, mpi_ec_t ec)
+{
+ if (!strcmp (name, "g") && ec->G)
+ return point_copy (ec->G);
+ if (!strcmp (name, "q"))
+ {
+ /* If only the private key is given, compute the public key. */
+ if (!ec->Q)
+ ec->Q = _gcry_ecc_compute_public (NULL, ec);
+
+ if (ec->Q)
+ return point_copy (ec->Q);
+ }
+
+ return NULL;
+}
+
+
+/* Store the MPI NEWVALUE into the context EC under NAME. */
+gpg_err_code_t
+_gcry_ecc_set_mpi (const char *name, gcry_mpi_t newvalue, mpi_ec_t ec)
+{
+ gpg_err_code_t rc = 0;
+
+ if (!*name)
+ ;
+ else if (!strcmp (name, "p"))
+ {
+ mpi_free (ec->p);
+ ec->p = mpi_copy (newvalue);
+ _gcry_mpi_ec_get_reset (ec);
+ }
+ else if (!strcmp (name, "a"))
+ {
+ mpi_free (ec->a);
+ ec->a = mpi_copy (newvalue);
+ _gcry_mpi_ec_get_reset (ec);
+ }
+ else if (!strcmp (name, "b"))
+ {
+ mpi_free (ec->b);
+ ec->b = mpi_copy (newvalue);
+ }
+ else if (!strcmp (name, "n"))
+ {
+ mpi_free (ec->n);
+ ec->n = mpi_copy (newvalue);
+ }
+ else if (!strcmp (name, "h"))
+ {
+ mpi_get_ui (&ec->h, newvalue);
+ }
+ else if (*name == 'q' && (!name[1] || name[1] == '@'))
+ {
+ if (newvalue)
+ {
+ if (!ec->Q)
+ ec->Q = mpi_point_new (0);
+ rc = _gcry_mpi_ec_decode_point (ec->Q, newvalue, ec);
+ }
+ if (rc || !newvalue)
+ {
+ _gcry_mpi_point_release (ec->Q);
+ ec->Q = NULL;
+ }
+ /* Note: We assume that Q matches d and thus do not reset d. */
+ }
+ else if (!strcmp (name, "d"))
+ {
+ mpi_free (ec->d);
+ ec->d = mpi_copy (newvalue);
+ if (ec->d)
+ {
+ /* We need to reset the public key because it may not
+ anymore match. */
+ _gcry_mpi_point_release (ec->Q);
+ ec->Q = NULL;
+ }
+ }
+ else
+ rc = GPG_ERR_UNKNOWN_NAME;
+
+ return rc;
+}
+
+
+/* Store the point NEWVALUE into the context EC under NAME. */
+gpg_err_code_t
+_gcry_ecc_set_point (const char *name, gcry_mpi_point_t newvalue, mpi_ec_t ec)
+{
+ if (!strcmp (name, "g"))
+ {
+ _gcry_mpi_point_release (ec->G);
+ ec->G = point_copy (newvalue);
+ }
+ else if (!strcmp (name, "q"))
+ {
+ _gcry_mpi_point_release (ec->Q);
+ ec->Q = point_copy (newvalue);
+ }
+ else
+ return GPG_ERR_UNKNOWN_NAME;
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/ecc-ecdh.c b/comm/third_party/libgcrypt/cipher/ecc-ecdh.c
new file mode 100644
index 0000000000..d6b8991af6
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-ecdh.c
@@ -0,0 +1,127 @@
+/* ecc-ecdh.c - Elliptic Curve Diffie-Hellman key agreement
+ * Copyright (C) 2019 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1+
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+
+#define ECC_CURVE25519_BYTES 32
+#define ECC_CURVE448_BYTES 56
+
+static gpg_err_code_t
+prepare_ec (mpi_ec_t *r_ec, const char *name)
+{
+ int flags = 0;
+
+ if (!strcmp (name, "Curve25519"))
+ flags = PUBKEY_FLAG_DJB_TWEAK;
+
+ return _gcry_mpi_ec_internal_new (r_ec, &flags, "ecc_mul_point", NULL, name);
+}
+
+unsigned int
+_gcry_ecc_get_algo_keylen (int curveid)
+{
+ unsigned int len = 0;
+
+ if (curveid == GCRY_ECC_CURVE25519)
+ len = ECC_CURVE25519_BYTES;
+ else if (curveid == GCRY_ECC_CURVE448)
+ len = ECC_CURVE448_BYTES;
+
+ return len;
+}
+
+gpg_error_t
+_gcry_ecc_mul_point (int curveid, unsigned char *result,
+ const unsigned char *scalar, const unsigned char *point)
+{
+ unsigned int nbits;
+ unsigned int nbytes;
+ const char *curve;
+ gpg_err_code_t err;
+ gcry_mpi_t mpi_k;
+ mpi_ec_t ec;
+ mpi_point_struct Q;
+ gcry_mpi_t x;
+ unsigned int len;
+ unsigned char *buf;
+
+ if (curveid == GCRY_ECC_CURVE25519)
+ curve = "Curve25519";
+ else if (curveid == GCRY_ECC_CURVE448)
+ curve = "X448";
+ else
+ return gpg_error (GPG_ERR_UNKNOWN_CURVE);
+
+ err = prepare_ec (&ec, curve);
+ if (err)
+ return err;
+
+ nbits = ec->nbits;
+ nbytes = (nbits + 7)/8;
+
+ mpi_k = _gcry_mpi_set_opaque_copy (NULL, scalar, nbytes*8);
+ x = mpi_new (nbits);
+ point_init (&Q);
+
+ if (point)
+ {
+ gcry_mpi_t mpi_u = _gcry_mpi_set_opaque_copy (NULL, point, nbytes*8);
+ mpi_point_struct P;
+
+ point_init (&P);
+ err = _gcry_ecc_mont_decodepoint (mpi_u, ec, &P);
+ _gcry_mpi_release (mpi_u);
+ if (err)
+ goto leave;
+ _gcry_mpi_ec_mul_point (&Q, mpi_k, &P, ec);
+ point_free (&P);
+ }
+ else
+ _gcry_mpi_ec_mul_point (&Q, mpi_k, ec->G, ec);
+
+ _gcry_mpi_ec_get_affine (x, NULL, &Q, ec);
+
+ buf = _gcry_mpi_get_buffer (x, nbytes, &len, NULL);
+ if (!buf)
+ err = gpg_error_from_syserror ();
+ else
+ {
+ memcpy (result, buf, nbytes);
+ xfree (buf);
+ }
+
+ leave:
+ _gcry_mpi_release (x);
+ point_free (&Q);
+ _gcry_mpi_release (mpi_k);
+ _gcry_mpi_ec_free (ec);
+ return err;
+}
diff --git a/comm/third_party/libgcrypt/cipher/ecc-ecdsa.c b/comm/third_party/libgcrypt/cipher/ecc-ecdsa.c
new file mode 100644
index 0000000000..30103f1417
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-ecdsa.c
@@ -0,0 +1,248 @@
+/* ecc-ecdsa.c - Elliptic Curve ECDSA signatures
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
+
+
+/* Compute an ECDSA signature.
+ * Return the signature struct (r,s) from the message hash. The caller
+ * must have allocated R and S.
+ */
+gpg_err_code_t
+_gcry_ecc_ecdsa_sign (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s,
+ int flags, int hashalgo)
+{
+ gpg_err_code_t rc = 0;
+ int extraloops = 0;
+ gcry_mpi_t k, dr, sum, k_1, x;
+ mpi_point_struct I;
+ gcry_mpi_t hash;
+ const void *abuf;
+ unsigned int abits, qbits;
+ gcry_mpi_t b; /* Random number needed for blinding. */
+ gcry_mpi_t bi; /* multiplicative inverse of B. */
+
+ if (DBG_CIPHER)
+ log_mpidump ("ecdsa sign hash ", input );
+
+ qbits = mpi_get_nbits (ec->n);
+
+ /* Convert the INPUT into an MPI if needed. */
+ rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+ if (rc)
+ return rc;
+
+ b = mpi_snew (qbits);
+ bi = mpi_snew (qbits);
+ do
+ {
+ _gcry_mpi_randomize (b, qbits, GCRY_WEAK_RANDOM);
+ mpi_mod (b, b, ec->n);
+ }
+ while (!mpi_invm (bi, b, ec->n));
+
+ k = NULL;
+ dr = mpi_alloc (0);
+ sum = mpi_alloc (0);
+ k_1 = mpi_alloc (0);
+ x = mpi_alloc (0);
+ point_init (&I);
+
+ /* Two loops to avoid R or S are zero. This is more of a joke than
+ a real demand because the probability of them being zero is less
+ than any hardware failure. Some specs however require it. */
+ do
+ {
+ do
+ {
+ mpi_free (k);
+ k = NULL;
+ if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo)
+ {
+ /* Use Pornin's method for deterministic DSA. If this
+ flag is set, it is expected that HASH is an opaque
+ MPI with the to be signed hash. That hash is also
+ used as h1 from 3.2.a. */
+ if (!mpi_is_opaque (input))
+ {
+ rc = GPG_ERR_CONFLICT;
+ goto leave;
+ }
+
+ abuf = mpi_get_opaque (input, &abits);
+ rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d,
+ abuf, (abits+7)/8,
+ hashalgo, extraloops);
+ if (rc)
+ goto leave;
+ extraloops++;
+ }
+ else
+ k = _gcry_dsa_gen_k (ec->n, GCRY_STRONG_RANDOM);
+
+ mpi_invm (k_1, k, ec->n); /* k_1 = k^(-1) mod n */
+
+ _gcry_dsa_modify_k (k, ec->n, qbits);
+
+ _gcry_mpi_ec_mul_point (&I, k, ec->G, ec);
+ if (_gcry_mpi_ec_get_affine (x, NULL, &I, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("ecc sign: Failed to get affine coordinates\n");
+ rc = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ mpi_mod (r, x, ec->n); /* r = x mod n */
+ }
+ while (!mpi_cmp_ui (r, 0));
+
+ /* Computation of dr, sum, and s are blinded with b. */
+ mpi_mulm (dr, b, ec->d, ec->n);
+ mpi_mulm (dr, dr, r, ec->n); /* dr = d*r mod n */
+ mpi_mulm (sum, b, hash, ec->n);
+ mpi_addm (sum, sum, dr, ec->n); /* sum = hash + (d*r) mod n */
+ mpi_mulm (s, k_1, sum, ec->n); /* s = k^(-1)*(hash+(d*r)) mod n */
+ /* Undo blinding by b^-1 */
+ mpi_mulm (s, bi, s, ec->n);
+ }
+ while (!mpi_cmp_ui (s, 0));
+
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("ecdsa sign result r ", r);
+ log_mpidump ("ecdsa sign result s ", s);
+ }
+
+ leave:
+ mpi_free (b);
+ mpi_free (bi);
+ point_free (&I);
+ mpi_free (x);
+ mpi_free (k_1);
+ mpi_free (sum);
+ mpi_free (dr);
+ mpi_free (k);
+
+ if (hash != input)
+ mpi_free (hash);
+
+ return rc;
+}
+
+
+/* Verify an ECDSA signature.
+ * Check if R and S verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_ecdsa_verify (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s)
+{
+ gpg_err_code_t err = 0;
+ gcry_mpi_t hash, h, h1, h2, x;
+ mpi_point_struct Q, Q1, Q2;
+ unsigned int nbits;
+
+ if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+ return GPG_ERR_BROKEN_PUBKEY;
+
+ if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, ec->n) < 0) )
+ return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */
+ if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, ec->n) < 0) )
+ return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */
+
+ nbits = mpi_get_nbits (ec->n);
+ err = _gcry_dsa_normalize_hash (input, &hash, nbits);
+ if (err)
+ return err;
+
+ h = mpi_alloc (0);
+ h1 = mpi_alloc (0);
+ h2 = mpi_alloc (0);
+ x = mpi_alloc (0);
+ point_init (&Q);
+ point_init (&Q1);
+ point_init (&Q2);
+
+ /* h = s^(-1) (mod n) */
+ mpi_invm (h, s, ec->n);
+ /* h1 = hash * s^(-1) (mod n) */
+ mpi_mulm (h1, hash, h, ec->n);
+ /* Q1 = [ hash * s^(-1) ]G */
+ _gcry_mpi_ec_mul_point (&Q1, h1, ec->G, ec);
+ /* h2 = r * s^(-1) (mod n) */
+ mpi_mulm (h2, r, h, ec->n);
+ /* Q2 = [ r * s^(-1) ]Q */
+ _gcry_mpi_ec_mul_point (&Q2, h2, ec->Q, ec);
+ /* Q = ([hash * s^(-1)]G) + ([r * s^(-1)]Q) */
+ _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ec);
+
+ if (!mpi_cmp_ui (Q.z, 0))
+ {
+ if (DBG_CIPHER)
+ log_debug ("ecc verify: Rejected\n");
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("ecc verify: Failed to get affine coordinates\n");
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ mpi_mod (x, x, ec->n); /* x = x mod E_n */
+ if (mpi_cmp (x, r)) /* x != r */
+ {
+ if (DBG_CIPHER)
+ {
+ log_mpidump (" x", x);
+ log_mpidump (" r", r);
+ log_mpidump (" s", s);
+ }
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+
+ leave:
+ point_free (&Q2);
+ point_free (&Q1);
+ point_free (&Q);
+ mpi_free (x);
+ mpi_free (h2);
+ mpi_free (h1);
+ mpi_free (h);
+ if (hash != input)
+ mpi_free (hash);
+
+ return err;
+}
diff --git a/comm/third_party/libgcrypt/cipher/ecc-eddsa.c b/comm/third_party/libgcrypt/cipher/ecc-eddsa.c
new file mode 100644
index 0000000000..2a1a89073c
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-eddsa.c
@@ -0,0 +1,1182 @@
+/* ecc-eddsa.c - Elliptic Curve EdDSA signatures
+ * Copyright (C) 2013, 2014 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+
+
+
+void
+reverse_buffer (unsigned char *buffer, unsigned int length)
+{
+ unsigned int tmp, i;
+
+ for (i=0; i < length/2; i++)
+ {
+ tmp = buffer[i];
+ buffer[i] = buffer[length-1-i];
+ buffer[length-1-i] = tmp;
+ }
+}
+
+
+/* Helper to scan a hex string. */
+static gcry_mpi_t
+scanval (const char *string)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t val;
+
+ rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL);
+ if (rc)
+ log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc));
+ return val;
+}
+
+
+
+/* Encode MPI using the EdDSA scheme. MINLEN specifies the required
+ length of the buffer in bytes. On success 0 is returned an a
+ malloced buffer with the encoded point is stored at R_BUFFER; the
+ length of this buffer is stored at R_BUFLEN. */
+static gpg_err_code_t
+eddsa_encodempi (gcry_mpi_t mpi, unsigned int nbits,
+ unsigned char **r_buffer, unsigned int *r_buflen)
+{
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+ unsigned int minlen = (nbits%8) == 0 ? (nbits/8 + 1): (nbits+7)/8;
+
+ rawmpi = _gcry_mpi_get_buffer (mpi, minlen, &rawmpilen, NULL);
+ if (!rawmpi)
+ return gpg_err_code_from_syserror ();
+
+ *r_buffer = rawmpi;
+ *r_buflen = rawmpilen;
+ return 0;
+}
+
+
+/* Encode (X,Y) using the EdDSA scheme. NBITS is the number of bits
+ of the field of the curve. If WITH_PREFIX is set the returned
+ buffer is prefixed with a 0x40 byte. On success 0 is returned and
+ a malloced buffer with the encoded point is stored at R_BUFFER; the
+ length of this buffer is stored at R_BUFLEN. */
+static gpg_err_code_t
+eddsa_encode_x_y (gcry_mpi_t x, gcry_mpi_t y, unsigned int nbits,
+ int with_prefix,
+ unsigned char **r_buffer, unsigned int *r_buflen)
+{
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+ int off = with_prefix? 1:0;
+ unsigned int minlen = (nbits%8) == 0 ? (nbits/8 + 1): (nbits+7)/8;
+
+ rawmpi = _gcry_mpi_get_buffer_extra (y, minlen, off?-1:0, &rawmpilen, NULL);
+ if (!rawmpi)
+ return gpg_err_code_from_syserror ();
+ if (mpi_test_bit (x, 0) && rawmpilen)
+ rawmpi[off + rawmpilen - 1] |= 0x80; /* Set sign bit. */
+ if (off)
+ rawmpi[0] = 0x40;
+
+ *r_buffer = rawmpi;
+ *r_buflen = rawmpilen + off;
+ return 0;
+}
+
+/* Encode POINT using the EdDSA scheme. X and Y are either scratch
+ variables supplied by the caller or NULL. CTX is the usual
+ context. If WITH_PREFIX is set the returned buffer is prefixed
+ with a 0x40 byte. On success 0 is returned and a malloced buffer
+ with the encoded point is stored at R_BUFFER; the length of this
+ buffer is stored at R_BUFLEN. */
+gpg_err_code_t
+_gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ec,
+ gcry_mpi_t x_in, gcry_mpi_t y_in,
+ int with_prefix,
+ unsigned char **r_buffer, unsigned int *r_buflen)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t x, y;
+
+ x = x_in? x_in : mpi_new (0);
+ y = y_in? y_in : mpi_new (0);
+
+ if (_gcry_mpi_ec_get_affine (x, y, point, ec))
+ {
+ log_error ("eddsa_encodepoint: Failed to get affine coordinates\n");
+ rc = GPG_ERR_INTERNAL;
+ }
+ else
+ rc = eddsa_encode_x_y (x, y, ec->nbits, with_prefix, r_buffer, r_buflen);
+
+ if (!x_in)
+ mpi_free (x);
+ if (!y_in)
+ mpi_free (y);
+ return rc;
+}
+
+
+/* Make sure that the opaque MPI VALUE is in compact EdDSA format.
+ This function updates MPI if needed. */
+gpg_err_code_t
+_gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value, unsigned int nbits)
+{
+ gpg_err_code_t rc;
+ const unsigned char *buf;
+ unsigned int rawmpilen;
+ gcry_mpi_t x, y;
+ unsigned char *enc;
+ unsigned int enclen;
+
+ if (!mpi_is_opaque (value))
+ return GPG_ERR_INV_OBJ;
+ buf = mpi_get_opaque (value, &rawmpilen);
+ if (!buf)
+ return GPG_ERR_INV_OBJ;
+ rawmpilen = (rawmpilen + 7)/8;
+
+ if (rawmpilen > 1 && (rawmpilen%2))
+ {
+ if (buf[0] == 0x04)
+ {
+ /* Buffer is in SEC1 uncompressed format. Extract y and
+ compress. */
+ rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG,
+ buf+1, (rawmpilen-1)/2, NULL);
+ if (rc)
+ return rc;
+ rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG,
+ buf+1+(rawmpilen-1)/2, (rawmpilen-1)/2, NULL);
+ if (rc)
+ {
+ mpi_free (x);
+ return rc;
+ }
+
+ rc = eddsa_encode_x_y (x, y, nbits, 0, &enc, &enclen);
+ mpi_free (x);
+ mpi_free (y);
+ if (rc)
+ return rc;
+
+ mpi_set_opaque (value, enc, 8*enclen);
+ }
+ else if (buf[0] == 0x40)
+ {
+ /* Buffer is compressed but with our SEC1 alike compression
+ indicator. Remove that byte. FIXME: We should write and
+ use a function to manipulate an opaque MPI in place. */
+ if (!_gcry_mpi_set_opaque_copy (value, buf + 1, (rawmpilen - 1)*8))
+ return gpg_err_code_from_syserror ();
+ }
+ }
+
+ return 0;
+}
+
+
+static gpg_err_code_t
+ecc_ed448_recover_x (gcry_mpi_t x, gcry_mpi_t y, int x_0, mpi_ec_t ec)
+{
+ gpg_err_code_t rc = 0;
+ gcry_mpi_t u, v, u3, v3, t;
+ static gcry_mpi_t p34; /* Hard coded (P-3)/4 */
+
+ if (mpi_cmp (y, ec->p) >= 0)
+ rc = GPG_ERR_INV_OBJ;
+
+ if (!p34)
+ p34 = scanval ("3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+ "BFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF");
+
+ u = mpi_new (0);
+ v = mpi_new (0);
+ u3 = mpi_new (0);
+ v3 = mpi_new (0);
+ t = mpi_new (0);
+
+ /* Compute u and v */
+ /* u = y^2 */
+ mpi_mulm (u, y, y, ec->p);
+ /* v = b*y^2 */
+ mpi_mulm (v, ec->b, u, ec->p);
+ /* u = y^2-1 */
+ mpi_sub_ui (u, u, 1);
+ /* v = b*y^2-1 */
+ mpi_sub_ui (v, v, 1);
+
+ /* Compute sqrt(u/v) */
+ /* u3 = u^3 */
+ mpi_powm (u3, u, mpi_const (MPI_C_THREE), ec->p);
+ mpi_powm (v3, v, mpi_const (MPI_C_THREE), ec->p);
+ /* t = u^4 * u * v3 = u^5 * v^3 */
+ mpi_powm (t, u, mpi_const (MPI_C_FOUR), ec->p);
+ mpi_mulm (t, t, u, ec->p);
+ mpi_mulm (t, t, v3, ec->p);
+ /* t = t^((p-3)/4) = (u^5 * v^3)^((p-3)/4) */
+ mpi_powm (t, t, p34, ec->p);
+ /* x = t * u^3 * v = (u^3 * v) * (u^5 * v^3)^((p-3)/4) */
+ mpi_mulm (t, t, u3, ec->p);
+ mpi_mulm (x, t, v, ec->p);
+
+ /* t = v * x^2 */
+ mpi_mulm (t, x, x, ec->p);
+ mpi_mulm (t, t, v, ec->p);
+
+ if (mpi_cmp (t, u) != 0)
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ if (!mpi_cmp_ui (x, 0) && x_0)
+ rc = GPG_ERR_INV_OBJ;
+
+ /* Choose the desired square root according to parity */
+ if (mpi_test_bit (x, 0) != !!x_0)
+ mpi_sub (x, ec->p, x);
+ }
+
+ mpi_free (t);
+ mpi_free (u3);
+ mpi_free (v3);
+ mpi_free (v);
+ mpi_free (u);
+
+ return rc;
+}
+
+
+/* Recover X from Y and SIGN (which actually is a parity bit). */
+gpg_err_code_t
+_gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign, mpi_ec_t ec)
+{
+ gpg_err_code_t rc = 0;
+ gcry_mpi_t u, v, v3, t;
+ static gcry_mpi_t p58, seven;
+
+ /*
+ * This routine is actually curve specific. Now, only supports
+ * Ed25519 and Ed448.
+ */
+
+ if (ec->dialect != ECC_DIALECT_ED25519)
+ /* For now, it's only Ed448. */
+ return ecc_ed448_recover_x (x, y, sign, ec);
+
+ /* It's Ed25519. */
+
+ if (!p58)
+ p58 = scanval ("0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD");
+ if (!seven)
+ seven = mpi_set_ui (NULL, 7);
+
+ u = mpi_new (0);
+ v = mpi_new (0);
+ v3 = mpi_new (0);
+ t = mpi_new (0);
+
+ /* Compute u and v */
+ /* u = y^2 */
+ mpi_mulm (u, y, y, ec->p);
+ /* v = b*y^2 */
+ mpi_mulm (v, ec->b, u, ec->p);
+ /* u = y^2-1 */
+ mpi_sub_ui (u, u, 1);
+ /* v = b*y^2+1 */
+ mpi_add_ui (v, v, 1);
+
+ /* Compute sqrt(u/v) */
+ /* v3 = v^3 */
+ mpi_powm (v3, v, mpi_const (MPI_C_THREE), ec->p);
+ /* t = v3 * v3 * u * v = u * v^7 */
+ mpi_powm (t, v, seven, ec->p);
+ mpi_mulm (t, t, u, ec->p);
+ /* t = t^((p-5)/8) = (u * v^7)^((p-5)/8) */
+ mpi_powm (t, t, p58, ec->p);
+ /* x = t * u * v^3 = (u * v^3) * (u * v^7)^((p-5)/8) */
+ mpi_mulm (t, t, u, ec->p);
+ mpi_mulm (x, t, v3, ec->p);
+
+ /* Adjust if needed. */
+ /* t = v * x^2 */
+ mpi_mulm (t, x, x, ec->p);
+ mpi_mulm (t, t, v, ec->p);
+ /* -t == u ? x = x * sqrt(-1) */
+ mpi_sub (t, ec->p, t);
+ if (!mpi_cmp (t, u))
+ {
+ static gcry_mpi_t m1; /* Fixme: this is not thread-safe. */
+ if (!m1)
+ m1 = scanval ("2B8324804FC1DF0B2B4D00993DFBD7A7"
+ "2F431806AD2FE478C4EE1B274A0EA0B0");
+ mpi_mulm (x, x, m1, ec->p);
+ /* t = v * x^2 */
+ mpi_mulm (t, x, x, ec->p);
+ mpi_mulm (t, t, v, ec->p);
+ /* -t == u ? x = x * sqrt(-1) */
+ mpi_sub (t, ec->p, t);
+ if (!mpi_cmp (t, u))
+ rc = GPG_ERR_INV_OBJ;
+ }
+
+ /* Choose the desired square root according to parity */
+ if (mpi_test_bit (x, 0) != !!sign)
+ mpi_sub (x, ec->p, x);
+
+ mpi_free (t);
+ mpi_free (v3);
+ mpi_free (v);
+ mpi_free (u);
+
+ return rc;
+}
+
+
+/* Decode the EdDSA style encoded PK and set it into RESULT. CTX is
+ the usual curve context. If R_ENCPK is not NULL, the encoded PK is
+ stored at that address; this is a new copy to be released by the
+ caller. In contrast to the supplied PK, this is not an MPI and
+ thus guaranteed to be properly padded. R_ENCPKLEN receives the
+ length of that encoded key. */
+gpg_err_code_t
+_gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result,
+ unsigned char **r_encpk, unsigned int *r_encpklen)
+{
+ gpg_err_code_t rc;
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+ int sign;
+
+ if (mpi_is_opaque (pk))
+ {
+ const unsigned char *buf;
+ unsigned int len;
+
+ len = (ctx->nbits%8) == 0 ? (ctx->nbits/8 + 1): (ctx->nbits+7)/8;
+
+ buf = mpi_get_opaque (pk, &rawmpilen);
+ if (!buf)
+ return GPG_ERR_INV_OBJ;
+ rawmpilen = (rawmpilen + 7)/8;
+
+ if (!(rawmpilen == len
+ || rawmpilen == len + 1
+ || rawmpilen == len * 2 + 1))
+ return GPG_ERR_INV_OBJ;
+
+ /* Handle compression prefixes. The size of the buffer will be
+ odd in this case. */
+ if (rawmpilen > 1 && (rawmpilen == len + 1 || rawmpilen == len * 2 + 1))
+ {
+ /* First check whether the public key has been given in
+ standard uncompressed format (SEC1). No need to recover
+ x in this case. */
+ if (buf[0] == 0x04)
+ {
+ gcry_mpi_t x, y;
+
+ rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG,
+ buf+1, (rawmpilen-1)/2, NULL);
+ if (rc)
+ return rc;
+ rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG,
+ buf+1+(rawmpilen-1)/2, (rawmpilen-1)/2,NULL);
+ if (rc)
+ {
+ mpi_free (x);
+ return rc;
+ }
+
+ if (r_encpk)
+ {
+ rc = eddsa_encode_x_y (x, y, ctx->nbits, 0,
+ r_encpk, r_encpklen);
+ if (rc)
+ {
+ mpi_free (x);
+ mpi_free (y);
+ return rc;
+ }
+ }
+ mpi_snatch (result->x, x);
+ mpi_snatch (result->y, y);
+ mpi_set_ui (result->z, 1);
+ return 0;
+ }
+
+ /* Check whether the public key has been prefixed with a 0x40
+ byte to explicitly indicate compressed format using a SEC1
+ alike prefix byte. This is a Libgcrypt extension. */
+ if (buf[0] == 0x40)
+ {
+ rawmpilen--;
+ buf++;
+ }
+ }
+
+ /* EdDSA compressed point. */
+ rawmpi = xtrymalloc (rawmpilen);
+ if (!rawmpi)
+ return gpg_err_code_from_syserror ();
+ memcpy (rawmpi, buf, rawmpilen);
+ reverse_buffer (rawmpi, rawmpilen);
+ }
+ else
+ {
+ /* Note: Without using an opaque MPI it is not reliable possible
+ to find out whether the public key has been given in
+ uncompressed format. Thus we expect native EdDSA format. */
+ rawmpi = _gcry_mpi_get_buffer (pk, (ctx->nbits+7)/8, &rawmpilen, NULL);
+ if (!rawmpi)
+ return gpg_err_code_from_syserror ();
+ }
+
+ if (rawmpilen)
+ {
+ sign = !!(rawmpi[0] & 0x80);
+ rawmpi[0] &= 0x7f;
+ }
+ else
+ sign = 0;
+ _gcry_mpi_set_buffer (result->y, rawmpi, rawmpilen, 0);
+ if (r_encpk)
+ {
+ /* Revert to little endian. */
+ if (sign && rawmpilen)
+ rawmpi[0] |= 0x80;
+ reverse_buffer (rawmpi, rawmpilen);
+ *r_encpk = rawmpi;
+ if (r_encpklen)
+ *r_encpklen = rawmpilen;
+ }
+ else
+ xfree (rawmpi);
+
+ rc = _gcry_ecc_eddsa_recover_x (result->x, result->y, sign, ctx);
+ mpi_set_ui (result->z, 1);
+
+ return rc;
+}
+
+
+/* Compute the A value as used by EdDSA. The caller needs to provide
+ the context EC and the actual secret D as an MPI. The function
+ returns a newly allocated 64 byte buffer at r_digest; the first 32
+ bytes represent the A value. NULL is returned on error and NULL
+ stored at R_DIGEST. */
+gpg_err_code_t
+_gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest, mpi_ec_t ec)
+{
+ gpg_err_code_t rc;
+ unsigned char *rawmpi = NULL;
+ unsigned int rawmpilen;
+ unsigned char *digest;
+ int hashalgo, b;
+
+ *r_digest = NULL;
+
+ b = (ec->nbits+7)/8;
+
+ /*
+ * Choice of hashalgo is curve specific.
+ * For now, it's determine by the bit size of the field.
+ */
+ if (ec->nbits == 255)
+ hashalgo = GCRY_MD_SHA512;
+ else if (ec->nbits == 448)
+ {
+ b++;
+ hashalgo = GCRY_MD_SHAKE256;
+ }
+ else
+ return GPG_ERR_NOT_IMPLEMENTED;
+
+ /* Note that we clear DIGEST so we can use it as input to left pad
+ the key with zeroes for hashing. */
+ digest = xtrycalloc_secure (2, b);
+ if (!digest)
+ return gpg_err_code_from_syserror ();
+
+ rawmpi = _gcry_mpi_get_buffer (ec->d, 0, &rawmpilen, NULL);
+ if (!rawmpi)
+ {
+ xfree (digest);
+ return gpg_err_code_from_syserror ();
+ }
+
+ if (hashalgo == GCRY_MD_SHAKE256)
+ {
+ gcry_error_t err;
+ gcry_md_hd_t hd;
+
+ err = _gcry_md_open (&hd, hashalgo, 0);
+ if (err)
+ rc = gcry_err_code (err);
+ else
+ {
+ _gcry_md_write (hd, rawmpi, rawmpilen);
+ _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+ _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b);
+ _gcry_md_close (hd);
+ rc = 0;
+ }
+ }
+ else
+ {
+ gcry_buffer_t hvec[2];
+
+ memset (hvec, 0, sizeof hvec);
+
+ hvec[0].data = digest;
+ hvec[0].len = b > rawmpilen? b - rawmpilen : 0;
+ hvec[1].data = rawmpi;
+ hvec[1].len = rawmpilen;
+ rc = _gcry_md_hash_buffers (hashalgo, 0, digest, hvec, 2);
+ }
+
+ xfree (rawmpi);
+ if (rc)
+ {
+ xfree (digest);
+ return rc;
+ }
+
+ /* Compute the A value. */
+ reverse_buffer (digest, b); /* Only the first half of the hash. */
+
+ /* Field specific handling of clearing/setting bits. */
+ if (ec->nbits == 255)
+ {
+ digest[0] = (digest[0] & 0x7f) | 0x40;
+ digest[31] &= 0xf8;
+ }
+ else
+ {
+ digest[0] = 0;
+ digest[1] |= 0x80;
+ digest[56] &= 0xfc;
+ }
+
+ *r_digest = digest;
+ return 0;
+}
+
+
+/**
+ * _gcry_ecc_eddsa_genkey - EdDSA version of the key generation.
+ *
+ * @ec: Elliptic curve computation context.
+ * @flags: Flags controlling aspects of the creation.
+ *
+ * Return: An error code.
+ *
+ * The only @flags bit used by this function is %PUBKEY_FLAG_TRANSIENT
+ * to use a faster RNG.
+ */
+gpg_err_code_t
+_gcry_ecc_eddsa_genkey (mpi_ec_t ec, int flags)
+{
+ gpg_err_code_t rc;
+ int b;
+ gcry_mpi_t a, x, y;
+ mpi_point_struct Q;
+ gcry_random_level_t random_level;
+ char *dbuf;
+ size_t dlen;
+ unsigned char *hash_d = NULL;
+
+ point_init (&Q);
+
+ if ((flags & PUBKEY_FLAG_TRANSIENT_KEY))
+ random_level = GCRY_STRONG_RANDOM;
+ else
+ random_level = GCRY_VERY_STRONG_RANDOM;
+
+ b = (ec->nbits+7)/8;
+
+ if (ec->nbits == 255)
+ ;
+ else if (ec->nbits == 448)
+ b++;
+ else
+ return GPG_ERR_NOT_IMPLEMENTED;
+
+ dlen = b;
+
+ a = mpi_snew (0);
+ x = mpi_new (0);
+ y = mpi_new (0);
+
+ /* Generate a secret. */
+ dbuf = _gcry_random_bytes_secure (dlen, random_level);
+ ec->d = _gcry_mpi_set_opaque (NULL, dbuf, dlen*8);
+ rc = _gcry_ecc_eddsa_compute_h_d (&hash_d, ec);
+ if (rc)
+ goto leave;
+
+ _gcry_mpi_set_buffer (a, hash_d, b, 0);
+ xfree (hash_d);
+ /* log_printmpi ("ecgen a", a); */
+
+ /* Compute Q. */
+ _gcry_mpi_ec_mul_point (&Q, a, ec->G, ec);
+ if (DBG_CIPHER)
+ log_printpnt ("ecgen pk", &Q, ec);
+
+ ec->Q = mpi_point_snatch_set (NULL, Q.x, Q.y, Q.z);
+ Q.x = NULL;
+ Q.y = NULL;
+ Q.x = NULL;
+
+ leave:
+ _gcry_mpi_release (a);
+ _gcry_mpi_release (x);
+ _gcry_mpi_release (y);
+ return rc;
+}
+
+
+/* Compute an EdDSA signature. See:
+ * [ed25519] 23pp. (PDF) Daniel J. Bernstein, Niels Duif, Tanja
+ * Lange, Peter Schwabe, Bo-Yin Yang. High-speed high-security
+ * signatures. Journal of Cryptographic Engineering 2 (2012), 77-89.
+ * Document ID: a1a62a2f76d23f65d622484ddd09caf8.
+ * URL: http://cr.yp.to/papers.html#ed25519. Date: 2011.09.26.
+ *
+ * Despite that this function requires the specification of a hash
+ * algorithm, we only support what has been specified by the paper.
+ * This may change in the future.
+ *
+ * Return the signature struct (r,s) from the message hash. The caller
+ * must have allocated R_R and S.
+ */
+
+/* String to be used with Ed448 */
+#define DOM25519 "SigEd25519 no Ed25519 collisions"
+#define DOM25519_LEN 32
+#define DOM448 "SigEd448"
+#define DOM448_LEN 8
+
+gpg_err_code_t
+_gcry_ecc_eddsa_sign (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r_r, gcry_mpi_t s,
+ struct pk_encoding_ctx *ctx)
+{
+ int rc;
+ unsigned int tmp;
+ unsigned char *digest = NULL;
+ const void *mbuf;
+ size_t mlen;
+ unsigned char *rawmpi = NULL;
+ unsigned int rawmpilen;
+ unsigned char *encpk = NULL; /* Encoded public key. */
+ unsigned int encpklen;
+ mpi_point_struct I; /* Intermediate value. */
+ gcry_mpi_t a, x, y, r;
+ int b;
+ unsigned char x_olen[2];
+ unsigned char prehashed_msg[64];
+
+ b = (ec->nbits+7)/8;
+
+ if (ec->nbits == 255)
+ ;
+ else if (ec->nbits == 448)
+ b++;
+ else
+ return GPG_ERR_NOT_IMPLEMENTED;
+
+ if (!mpi_is_opaque (input))
+ return GPG_ERR_INV_DATA;
+
+ /* Initialize some helpers. */
+ point_init (&I);
+ a = mpi_snew (0);
+ x = mpi_new (0);
+ y = mpi_new (0);
+ r = mpi_snew (0);
+
+ rc = _gcry_ecc_eddsa_compute_h_d (&digest, ec);
+ if (rc)
+ goto leave;
+ _gcry_mpi_set_buffer (a, digest, b, 0);
+
+ /* Compute the public key if it's not available (only secret part). */
+ if (ec->Q == NULL)
+ {
+ mpi_point_struct Q;
+
+ point_init (&Q);
+ _gcry_mpi_ec_mul_point (&Q, a, ec->G, ec);
+ ec->Q = mpi_point_snatch_set (NULL, Q.x, Q.y, Q.z);
+ }
+ rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, x, y, 0, &encpk, &encpklen);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printhex (" e_pk", encpk, encpklen);
+
+ /* Compute R. */
+ mbuf = mpi_get_opaque (input, &tmp);
+ mlen = (tmp +7)/8;
+ if (DBG_CIPHER)
+ log_printhex (" m", mbuf, mlen);
+
+ if (ctx->hash_algo == GCRY_MD_SHAKE256)
+ {
+ gcry_error_t err;
+ gcry_md_hd_t hd;
+
+ err = _gcry_md_open (&hd, ctx->hash_algo, 0);
+ if (err)
+ rc = gcry_err_code (err);
+ else
+ {
+ _gcry_md_write (hd, DOM448, DOM448_LEN);
+ x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+ x_olen[1] = ctx->labellen;
+ _gcry_md_write (hd, x_olen, 2);
+ if (ctx->labellen)
+ _gcry_md_write (hd, ctx->label, ctx->labellen);
+ _gcry_md_write (hd, digest+b, b);
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+ {
+ gcry_md_hd_t hd2;
+
+ err = _gcry_md_open (&hd2, ctx->hash_algo, 0);
+ if (err)
+ {
+ rc = gcry_err_code (err);
+ _gcry_md_close (hd);
+ goto leave;
+ }
+ _gcry_md_write (hd2, mbuf, mlen);
+ _gcry_md_ctl (hd2, GCRYCTL_FINALIZE, NULL, 0);
+ _gcry_md_extract (hd2, GCRY_MD_SHAKE256, prehashed_msg, 64);
+ _gcry_md_close (hd2);
+ _gcry_md_write (hd, prehashed_msg, 64);
+ }
+ else
+ _gcry_md_write (hd, mbuf, mlen);
+ _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+ _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b);
+ _gcry_md_close (hd);
+ rc = 0;
+ }
+ }
+ else
+ {
+ gcry_buffer_t hvec[6];
+ int i = 0;
+
+ memset (hvec, 0, sizeof hvec);
+
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen)
+ {
+ hvec[i].data = (void *)DOM25519;
+ hvec[i].len = DOM25519_LEN;
+ i++;
+ x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+ x_olen[1] = ctx->labellen;
+ hvec[i].data = x_olen;
+ hvec[i].len = 2;
+ i++;
+ if (ctx->labellen)
+ {
+ hvec[i].data = ctx->label;
+ hvec[i].len = ctx->labellen;
+ i++;
+ }
+ }
+
+ hvec[i].data = digest;
+ hvec[i].off = b;
+ hvec[i].len = b;
+ i++;
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+ {
+ _gcry_md_hash_buffer (ctx->hash_algo, prehashed_msg, mbuf, mlen);
+ hvec[i].data = (char*)prehashed_msg;
+ hvec[i].len = 64;
+ }
+ else
+ {
+ hvec[i].data = (char*)mbuf;
+ hvec[i].len = mlen;
+ }
+ i++;
+ rc = _gcry_md_hash_buffers (ctx->hash_algo, 0, digest, hvec, i);
+ }
+
+ if (rc)
+ goto leave;
+ reverse_buffer (digest, 2*b);
+ if (DBG_CIPHER)
+ log_printhex (" r", digest, 2*b);
+ _gcry_mpi_set_buffer (r, digest, 2*b, 0);
+ mpi_mod (r, r, ec->n);
+ _gcry_mpi_ec_mul_point (&I, r, ec->G, ec);
+ if (DBG_CIPHER)
+ log_printpnt (" r", &I, ec);
+
+ /* Convert R into affine coordinates and apply encoding. */
+ rc = _gcry_ecc_eddsa_encodepoint (&I, ec, x, y, 0, &rawmpi, &rawmpilen);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printhex (" e_r", rawmpi, rawmpilen);
+
+ if (ctx->hash_algo == GCRY_MD_SHAKE256)
+ {
+ gcry_error_t err;
+ gcry_md_hd_t hd;
+
+ err = _gcry_md_open (&hd, ctx->hash_algo, 0);
+ if (err)
+ rc = gcry_err_code (err);
+ else
+ {
+ _gcry_md_write (hd, DOM448, DOM448_LEN);
+ x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+ x_olen[1] = ctx->labellen;
+ _gcry_md_write (hd, x_olen, 2);
+ if (ctx->labellen)
+ _gcry_md_write (hd, ctx->label, ctx->labellen);
+ _gcry_md_write (hd, rawmpi, rawmpilen);
+ _gcry_md_write (hd, encpk, encpklen);
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+ _gcry_md_write (hd, prehashed_msg, 64);
+ else
+ _gcry_md_write (hd, mbuf, mlen);
+ _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+ _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b);
+ _gcry_md_close (hd);
+ rc = 0;
+ }
+ }
+ else
+ {
+ gcry_buffer_t hvec[6];
+ int i = 0;
+
+ memset (hvec, 0, sizeof hvec);
+
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen)
+ {
+ hvec[i].data = (void *)DOM25519;
+ hvec[i].len = DOM25519_LEN;
+ i++;
+ x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+ x_olen[1] = ctx->labellen;
+ hvec[i].data = x_olen;
+ hvec[i].len = 2;
+ i++;
+ if (ctx->labellen)
+ {
+ hvec[i].data = ctx->label;
+ hvec[i].len = ctx->labellen;
+ i++;
+ }
+ }
+
+ /* S = r + a * H(dom2(F,C)+encodepoint(R)+encodepoint(pk)+m) mod n */
+ hvec[i].data = rawmpi; /* (this is R) */
+ hvec[i].len = rawmpilen;
+ i++;
+ hvec[i].data = encpk;
+ hvec[i].len = encpklen;
+ i++;
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+ {
+ hvec[i].data = (char*)prehashed_msg;
+ hvec[i].len = 64;
+ }
+ else
+ {
+ hvec[i].data = (char*)mbuf;
+ hvec[i].len = mlen;
+ }
+ i++;
+ rc = _gcry_md_hash_buffers (ctx->hash_algo, 0, digest, hvec, i);
+ }
+
+ if (rc)
+ goto leave;
+
+ /* No more need for RAWMPI thus we now transfer it to R_R. */
+ mpi_set_opaque (r_r, rawmpi, rawmpilen*8);
+ rawmpi = NULL;
+
+ reverse_buffer (digest, 2*b);
+ if (DBG_CIPHER)
+ log_printhex (" H(R+)", digest, 2*b);
+ _gcry_mpi_set_buffer (s, digest, 2*b, 0);
+ mpi_mulm (s, s, a, ec->n);
+ mpi_addm (s, s, r, ec->n);
+ rc = eddsa_encodempi (s, ec->nbits, &rawmpi, &rawmpilen);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printhex (" e_s", rawmpi, rawmpilen);
+ mpi_set_opaque (s, rawmpi, rawmpilen*8);
+ rawmpi = NULL;
+
+ rc = 0;
+
+ leave:
+ _gcry_mpi_release (a);
+ _gcry_mpi_release (x);
+ _gcry_mpi_release (y);
+ _gcry_mpi_release (r);
+ xfree (digest);
+ point_free (&I);
+ xfree (encpk);
+ xfree (rawmpi);
+ return rc;
+}
+
+
+/* Verify an EdDSA signature. See sign_eddsa for the reference.
+ * Check if R_IN and S_IN verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_eddsa_verify (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r_in, gcry_mpi_t s_in,
+ struct pk_encoding_ctx *ctx)
+{
+ int rc;
+ int b;
+ unsigned int tmp;
+ unsigned char *encpk = NULL; /* Encoded public key. */
+ unsigned int encpklen;
+ const void *mbuf, *rbuf;
+ unsigned char *tbuf = NULL;
+ size_t mlen, rlen;
+ unsigned int tlen;
+ unsigned char digest[114];
+ gcry_mpi_t h, s;
+ mpi_point_struct Ia, Ib;
+ unsigned char x_olen[2];
+ unsigned char prehashed_msg[64];
+
+ if (!mpi_is_opaque (input) || !mpi_is_opaque (r_in) || !mpi_is_opaque (s_in))
+ return GPG_ERR_INV_DATA;
+
+ point_init (&Ia);
+ point_init (&Ib);
+ h = mpi_new (0);
+ s = mpi_new (0);
+
+ b = (ec->nbits+7)/8;
+
+ if (ec->nbits == 255)
+ ;
+ else if (ec->nbits == 448)
+ b++;
+ else
+ return GPG_ERR_NOT_IMPLEMENTED;
+
+ /* Encode and check the public key. */
+ rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0,
+ &encpk, &encpklen);
+ if (rc)
+ goto leave;
+ if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+ {
+ rc = GPG_ERR_BROKEN_PUBKEY;
+ goto leave;
+ }
+ if (DBG_CIPHER)
+ log_printhex (" e_pk", encpk, encpklen);
+ if (encpklen != b)
+ {
+ rc = GPG_ERR_INV_LENGTH;
+ goto leave;
+ }
+
+ /* Convert the other input parameters. */
+ mbuf = mpi_get_opaque (input, &tmp);
+ mlen = (tmp +7)/8;
+ if (DBG_CIPHER)
+ log_printhex (" m", mbuf, mlen);
+ rbuf = mpi_get_opaque (r_in, &tmp);
+ rlen = (tmp +7)/8;
+ if (DBG_CIPHER)
+ log_printhex (" r", rbuf, rlen);
+ if (rlen != b)
+ {
+ rc = GPG_ERR_INV_LENGTH;
+ goto leave;
+ }
+
+ if (ctx->hash_algo == GCRY_MD_SHAKE256)
+ {
+ gcry_error_t err;
+ gcry_md_hd_t hd;
+
+ err = _gcry_md_open (&hd, ctx->hash_algo, 0);
+ if (err)
+ rc = gcry_err_code (err);
+ else
+ {
+ _gcry_md_write (hd, DOM448, DOM448_LEN);
+ x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+ x_olen[1] = ctx->labellen;
+ _gcry_md_write (hd, x_olen, 2);
+ if (ctx->labellen)
+ _gcry_md_write (hd, ctx->label, ctx->labellen);
+ _gcry_md_write (hd, rbuf, rlen);
+ _gcry_md_write (hd, encpk, encpklen);
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+ {
+ gcry_md_hd_t hd2;
+
+ err = _gcry_md_open (&hd2, ctx->hash_algo, 0);
+ if (err)
+ {
+ rc = gcry_err_code (err);
+ _gcry_md_close (hd);
+ goto leave;
+ }
+ _gcry_md_write (hd2, mbuf, mlen);
+ _gcry_md_ctl (hd2, GCRYCTL_FINALIZE, NULL, 0);
+ _gcry_md_extract (hd2, GCRY_MD_SHAKE256, prehashed_msg, 64);
+ _gcry_md_close (hd2);
+ _gcry_md_write (hd, prehashed_msg, 64);
+ }
+ else
+ _gcry_md_write (hd, mbuf, mlen);
+ _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+ _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b);
+ _gcry_md_close (hd);
+ rc = 0;
+ }
+ }
+ else
+ {
+ gcry_buffer_t hvec[6];
+ int i = 0;
+
+ memset (hvec, 0, sizeof hvec);
+
+ /* h = H(dom2(F,C)+encodepoint(R)+encodepoint(pk)+m) */
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen)
+ {
+ hvec[i].data = (void *)DOM25519;
+ hvec[i].len = DOM25519_LEN;
+ i++;
+ x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+ x_olen[1] = ctx->labellen;
+ hvec[i].data = x_olen;
+ hvec[i].len = 2;
+ i++;
+ if (ctx->labellen)
+ {
+ hvec[i].data = ctx->label;
+ hvec[i].len = ctx->labellen;
+ i++;
+ }
+ }
+
+ hvec[i].data = (char*)rbuf;
+ hvec[i].len = rlen;
+ i++;
+ hvec[i].data = encpk;
+ hvec[i].len = encpklen;
+ i++;
+ if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+ {
+ _gcry_md_hash_buffer (ctx->hash_algo, prehashed_msg, mbuf, mlen);
+ hvec[i].data = (char*)prehashed_msg;
+ hvec[i].len = 64;
+ }
+ else
+ {
+ hvec[i].data = (char*)mbuf;
+ hvec[i].len = mlen;
+ }
+ i++;
+ rc = _gcry_md_hash_buffers (ctx->hash_algo, 0, digest, hvec, i);
+ }
+
+ if (rc)
+ goto leave;
+ reverse_buffer (digest, 2*b);
+ if (DBG_CIPHER)
+ log_printhex (" H(R+)", digest, 2*b);
+ _gcry_mpi_set_buffer (h, digest, 2*b, 0);
+
+ /* According to the paper the best way for verification is:
+ encodepoint(sG - h·Q) = encodepoint(r)
+ because we don't need to decode R. */
+ {
+ void *sbuf;
+ unsigned int slen;
+
+ sbuf = _gcry_mpi_get_opaque_copy (s_in, &tmp);
+ slen = (tmp +7)/8;
+ reverse_buffer (sbuf, slen);
+ if (DBG_CIPHER)
+ log_printhex (" s", sbuf, slen);
+ _gcry_mpi_set_buffer (s, sbuf, slen, 0);
+ xfree (sbuf);
+ if (slen != b)
+ {
+ rc = GPG_ERR_INV_LENGTH;
+ goto leave;
+ }
+ }
+
+ _gcry_mpi_ec_mul_point (&Ia, s, ec->G, ec);
+ _gcry_mpi_ec_mul_point (&Ib, h, ec->Q, ec);
+ _gcry_mpi_sub (Ib.x, ec->p, Ib.x);
+ _gcry_mpi_ec_add_points (&Ia, &Ia, &Ib, ec);
+ rc = _gcry_ecc_eddsa_encodepoint (&Ia, ec, s, h, 0, &tbuf, &tlen);
+ if (rc)
+ goto leave;
+ if (tlen != rlen || memcmp (tbuf, rbuf, tlen))
+ {
+ rc = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+
+ rc = 0;
+
+ leave:
+ xfree (encpk);
+ xfree (tbuf);
+ _gcry_mpi_release (s);
+ _gcry_mpi_release (h);
+ point_free (&Ia);
+ point_free (&Ib);
+ return rc;
+}
diff --git a/comm/third_party/libgcrypt/cipher/ecc-gost.c b/comm/third_party/libgcrypt/cipher/ecc-gost.c
new file mode 100644
index 0000000000..36230f8a32
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-gost.c
@@ -0,0 +1,218 @@
+/* ecc-gots.c - Elliptic Curve GOST signatures
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+#include "pubkey-internal.h"
+
+
+/* Compute an GOST R 34.10-01/-12 signature.
+ * Return the signature struct (r,s) from the message hash. The caller
+ * must have allocated R and S.
+ */
+gpg_err_code_t
+_gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s)
+{
+ gpg_err_code_t rc = 0;
+ gcry_mpi_t k, dr, sum, ke, x, e;
+ mpi_point_struct I;
+ gcry_mpi_t hash;
+ unsigned int qbits;
+
+ if (DBG_CIPHER)
+ log_mpidump ("gost sign hash ", input );
+
+ qbits = mpi_get_nbits (ec->n);
+
+ /* Convert the INPUT into an MPI if needed. */
+ rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+ if (rc)
+ return rc;
+
+ k = NULL;
+ dr = mpi_alloc (0);
+ sum = mpi_alloc (0);
+ ke = mpi_alloc (0);
+ e = mpi_alloc (0);
+ x = mpi_alloc (0);
+ point_init (&I);
+
+ mpi_mod (e, input, ec->n); /* e = hash mod n */
+
+ if (!mpi_cmp_ui (e, 0))
+ mpi_set_ui (e, 1);
+
+ /* Two loops to avoid R or S are zero. This is more of a joke than
+ a real demand because the probability of them being zero is less
+ than any hardware failure. Some specs however require it. */
+ do
+ {
+ do
+ {
+ mpi_free (k);
+ k = _gcry_dsa_gen_k (ec->n, GCRY_STRONG_RANDOM);
+
+ _gcry_dsa_modify_k (k, ec->n, qbits);
+
+ _gcry_mpi_ec_mul_point (&I, k, ec->G, ec);
+ if (_gcry_mpi_ec_get_affine (x, NULL, &I, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("ecc sign: Failed to get affine coordinates\n");
+ rc = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ mpi_mod (r, x, ec->n); /* r = x mod n */
+ }
+ while (!mpi_cmp_ui (r, 0));
+ mpi_mulm (dr, ec->d, r, ec->n); /* dr = d*r mod n */
+ mpi_mulm (ke, k, e, ec->n); /* ke = k*e mod n */
+ mpi_addm (s, ke, dr, ec->n); /* sum = (k*e+ d*r) mod n */
+ }
+ while (!mpi_cmp_ui (s, 0));
+
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("gost sign result r ", r);
+ log_mpidump ("gost sign result s ", s);
+ }
+
+ leave:
+ point_free (&I);
+ mpi_free (x);
+ mpi_free (e);
+ mpi_free (ke);
+ mpi_free (sum);
+ mpi_free (dr);
+ mpi_free (k);
+
+ if (hash != input)
+ mpi_free (hash);
+
+ return rc;
+}
+
+
+/* Verify a GOST R 34.10-01/-12 signature.
+ * Check if R and S verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s)
+{
+ gpg_err_code_t err = 0;
+ gcry_mpi_t e, x, z1, z2, v, rv, zero;
+ mpi_point_struct Q, Q1, Q2;
+
+ if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+ return GPG_ERR_BROKEN_PUBKEY;
+
+ if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, ec->n) < 0) )
+ return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */
+ if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, ec->n) < 0) )
+ return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */
+
+ x = mpi_alloc (0);
+ e = mpi_alloc (0);
+ z1 = mpi_alloc (0);
+ z2 = mpi_alloc (0);
+ v = mpi_alloc (0);
+ rv = mpi_alloc (0);
+ zero = mpi_alloc (0);
+
+ point_init (&Q);
+ point_init (&Q1);
+ point_init (&Q2);
+
+ mpi_mod (e, input, ec->n); /* e = hash mod n */
+ if (!mpi_cmp_ui (e, 0))
+ mpi_set_ui (e, 1);
+ mpi_invm (v, e, ec->n); /* v = e^(-1) (mod n) */
+ mpi_mulm (z1, s, v, ec->n); /* z1 = s*v (mod n) */
+ mpi_mulm (rv, r, v, ec->n); /* rv = r*v (mod n) */
+ mpi_subm (z2, zero, rv, ec->n); /* z2 = -r*v (mod n) */
+
+ _gcry_mpi_ec_mul_point (&Q1, z1, ec->G, ec);
+/* log_mpidump ("Q1.x", Q1.x); */
+/* log_mpidump ("Q1.y", Q1.y); */
+/* log_mpidump ("Q1.z", Q1.z); */
+ _gcry_mpi_ec_mul_point (&Q2, z2, ec->Q, ec);
+/* log_mpidump ("Q2.x", Q2.x); */
+/* log_mpidump ("Q2.y", Q2.y); */
+/* log_mpidump ("Q2.z", Q2.z); */
+ _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ec);
+/* log_mpidump (" Q.x", Q.x); */
+/* log_mpidump (" Q.y", Q.y); */
+/* log_mpidump (" Q.z", Q.z); */
+
+ if (!mpi_cmp_ui (Q.z, 0))
+ {
+ if (DBG_CIPHER)
+ log_debug ("ecc verify: Rejected\n");
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("ecc verify: Failed to get affine coordinates\n");
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ mpi_mod (x, x, ec->n); /* x = x mod E_n */
+ if (mpi_cmp (x, r)) /* x != r */
+ {
+ if (DBG_CIPHER)
+ {
+ log_mpidump (" x", x);
+ log_mpidump (" r", r);
+ log_mpidump (" s", s);
+ log_debug ("ecc verify: Not verified\n");
+ }
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ if (DBG_CIPHER)
+ log_debug ("ecc verify: Accepted\n");
+
+ leave:
+ point_free (&Q2);
+ point_free (&Q1);
+ point_free (&Q);
+ mpi_free (zero);
+ mpi_free (rv);
+ mpi_free (v);
+ mpi_free (z2);
+ mpi_free (z1);
+ mpi_free (x);
+ mpi_free (e);
+ return err;
+}
diff --git a/comm/third_party/libgcrypt/cipher/ecc-misc.c b/comm/third_party/libgcrypt/cipher/ecc-misc.c
new file mode 100644
index 0000000000..6470a83bf4
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-misc.c
@@ -0,0 +1,438 @@
+/* ecc-misc.c - Elliptic Curve miscellaneous functions
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+
+
+/*
+ * Release a curve object.
+ */
+void
+_gcry_ecc_curve_free (elliptic_curve_t *E)
+{
+ mpi_free (E->p); E->p = NULL;
+ mpi_free (E->a); E->a = NULL;
+ mpi_free (E->b); E->b = NULL;
+ _gcry_mpi_point_free_parts (&E->G);
+ mpi_free (E->n); E->n = NULL;
+}
+
+
+/*
+ * Return a copy of a curve object.
+ */
+elliptic_curve_t
+_gcry_ecc_curve_copy (elliptic_curve_t E)
+{
+ elliptic_curve_t R;
+
+ R.model = E.model;
+ R.dialect = E.dialect;
+ R.name = E.name;
+ R.p = mpi_copy (E.p);
+ R.a = mpi_copy (E.a);
+ R.b = mpi_copy (E.b);
+ _gcry_mpi_point_init (&R.G);
+ point_set (&R.G, &E.G);
+ R.n = mpi_copy (E.n);
+ R.h = E.h;
+
+ return R;
+}
+
+
+/*
+ * Return a description of the curve model.
+ */
+const char *
+_gcry_ecc_model2str (enum gcry_mpi_ec_models model)
+{
+ const char *str = "?";
+ switch (model)
+ {
+ case MPI_EC_WEIERSTRASS: str = "Weierstrass"; break;
+ case MPI_EC_MONTGOMERY: str = "Montgomery"; break;
+ case MPI_EC_EDWARDS: str = "Edwards"; break;
+ }
+ return str;
+}
+
+
+/*
+ * Return a description of the curve dialect.
+ */
+const char *
+_gcry_ecc_dialect2str (enum ecc_dialects dialect)
+{
+ const char *str = "?";
+ switch (dialect)
+ {
+ case ECC_DIALECT_STANDARD: str = "Standard"; break;
+ case ECC_DIALECT_ED25519: str = "Ed25519"; break;
+ case ECC_DIALECT_SAFECURVE: str = "SafeCurve"; break;
+ }
+ return str;
+}
+
+
+gcry_mpi_t
+_gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p)
+{
+ gpg_err_code_t rc;
+ int pbytes = (mpi_get_nbits (p)+7)/8;
+ size_t n;
+ unsigned char *buf, *ptr;
+
+ buf = xmalloc ( 1 + 2*pbytes );
+ *buf = 04; /* Uncompressed point. */
+ ptr = buf+1;
+ rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, x);
+ if (rc)
+ log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc));
+ if (n < pbytes)
+ {
+ memmove (ptr+(pbytes-n), ptr, n);
+ memset (ptr, 0, (pbytes-n));
+ }
+ ptr += pbytes;
+ rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, y);
+ if (rc)
+ log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc));
+ if (n < pbytes)
+ {
+ memmove (ptr+(pbytes-n), ptr, n);
+ memset (ptr, 0, (pbytes-n));
+ }
+
+ return mpi_set_opaque (NULL, buf, (1+2*pbytes)*8);
+}
+
+
+/* Convert POINT into affine coordinates using the context CTX and
+ return a newly allocated MPI. If the conversion is not possible
+ NULL is returned. This function won't print an error message. */
+gcry_mpi_t
+_gcry_mpi_ec_ec2os (gcry_mpi_point_t point, mpi_ec_t ec)
+{
+ gcry_mpi_t g_x, g_y, result;
+
+ g_x = mpi_new (0);
+ g_y = mpi_new (0);
+ if (_gcry_mpi_ec_get_affine (g_x, g_y, point, ec))
+ result = NULL;
+ else
+ result = _gcry_ecc_ec2os (g_x, g_y, ec->p);
+ mpi_free (g_x);
+ mpi_free (g_y);
+
+ return result;
+}
+
+
+/* Decode octet string in VALUE into RESULT, in the format defined by SEC 1.
+ RESULT must have been initialized and is set on success to the
+ point given by VALUE. */
+gpg_err_code_t
+_gcry_ecc_sec_decodepoint (gcry_mpi_t value, mpi_ec_t ec, mpi_point_t result)
+{
+ gpg_err_code_t rc;
+ size_t n;
+ const unsigned char *buf;
+ unsigned char *buf_memory;
+ gcry_mpi_t x, y;
+
+ if (mpi_is_opaque (value))
+ {
+ unsigned int nbits;
+
+ buf = mpi_get_opaque (value, &nbits);
+ if (!buf)
+ return GPG_ERR_INV_OBJ;
+ n = (nbits + 7)/8;
+ buf_memory = NULL;
+ }
+ else
+ {
+ n = (mpi_get_nbits (value)+7)/8;
+ buf_memory = xmalloc (n);
+ rc = _gcry_mpi_print (GCRYMPI_FMT_USG, buf_memory, n, &n, value);
+ if (rc)
+ {
+ xfree (buf_memory);
+ return rc;
+ }
+ buf = buf_memory;
+ }
+
+ if (n < 1)
+ {
+ xfree (buf_memory);
+ return GPG_ERR_INV_OBJ;
+ }
+
+ if (*buf == 2 || *buf == 3)
+ {
+ gcry_mpi_t x3;
+ gcry_mpi_t t;
+ gcry_mpi_t p1_4;
+ int y_bit = (*buf == 3);
+
+ if (!mpi_test_bit (ec->p, 1))
+ {
+ xfree (buf_memory);
+ return GPG_ERR_NOT_IMPLEMENTED; /* No support for point compression. */
+ }
+
+ n = n - 1;
+ rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL);
+ xfree (buf_memory);
+ if (rc)
+ return rc;
+
+ /*
+ * Recover Y. The Weierstrass curve: y^2 = x^3 + a*x + b
+ */
+
+ x3 = mpi_new (0);
+ t = mpi_new (0);
+ p1_4 = mpi_new (0);
+ y = mpi_new (0);
+
+ /* Compute right hand side. */
+ mpi_powm (x3, x, mpi_const (MPI_C_THREE), ec->p);
+ mpi_mul (t, ec->a, x);
+ mpi_mod (t, t, ec->p);
+ mpi_add (t, t, ec->b);
+ mpi_mod (t, t, ec->p);
+ mpi_add (t, t, x3);
+ mpi_mod (t, t, ec->p);
+
+ /*
+ * When p mod 4 = 3, modular square root of A can be computed by
+ * A^((p+1)/4) mod p
+ */
+
+ /* Compute (p+1)/4 into p1_4 */
+ mpi_rshift (p1_4, ec->p, 2);
+ _gcry_mpi_add_ui (p1_4, p1_4, 1);
+
+ mpi_powm (y, t, p1_4, ec->p);
+
+ if (y_bit != mpi_test_bit (y, 0))
+ mpi_sub (y, ec->p, y);
+
+ mpi_free (p1_4);
+ mpi_free (t);
+ mpi_free (x3);
+ }
+ else if (*buf == 4)
+ {
+ if ( ((n-1)%2) )
+ {
+ xfree (buf_memory);
+ return GPG_ERR_INV_OBJ;
+ }
+ n = (n-1)/2;
+ rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL);
+ if (rc)
+ {
+ xfree (buf_memory);
+ return rc;
+ }
+ rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG, buf+1+n, n, NULL);
+ xfree (buf_memory);
+ if (rc)
+ {
+ mpi_free (x);
+ return rc;
+ }
+ }
+ else
+ {
+ xfree (buf_memory);
+ return GPG_ERR_INV_OBJ;
+ }
+
+ mpi_set (result->x, x);
+ mpi_set (result->y, y);
+ mpi_set_ui (result->z, 1);
+
+ mpi_free (x);
+ mpi_free (y);
+
+ return 0;
+}
+
+
+/* Compute the public key from the the context EC. Obviously a
+ requirement is that the secret key is available in EC. On success
+ Q is returned; on error NULL. If Q is NULL a newly allocated point
+ is returned. If G or D are given they override the values taken
+ from EC. */
+mpi_point_t
+_gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec)
+{
+ if (!ec->d || !ec->G || !ec->p || !ec->a)
+ return NULL;
+ if (ec->model == MPI_EC_EDWARDS && !ec->b)
+ return NULL;
+
+ if ((ec->dialect == ECC_DIALECT_ED25519 && (ec->flags & PUBKEY_FLAG_EDDSA))
+ || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE))
+ {
+ gcry_mpi_t a;
+ unsigned char *digest;
+
+ if (_gcry_ecc_eddsa_compute_h_d (&digest, ec))
+ return NULL;
+
+ a = mpi_snew (0);
+ _gcry_mpi_set_buffer (a, digest, 32, 0);
+ xfree (digest);
+
+ /* And finally the public key. */
+ if (!Q)
+ Q = mpi_point_new (0);
+ if (Q)
+ _gcry_mpi_ec_mul_point (Q, a, ec->G, ec);
+ mpi_free (a);
+ }
+ else
+ {
+ if (!Q)
+ Q = mpi_point_new (0);
+ if (Q)
+ _gcry_mpi_ec_mul_point (Q, ec->d, ec->G, ec);
+ }
+
+ return Q;
+}
+
+
+gpg_err_code_t
+_gcry_ecc_mont_encodepoint (gcry_mpi_t x, unsigned int nbits,
+ int with_prefix,
+ unsigned char **r_buffer, unsigned int *r_buflen)
+{
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+
+ rawmpi = _gcry_mpi_get_buffer_extra (x, (nbits+7)/8,
+ with_prefix? -1 : 0, &rawmpilen, NULL);
+ if (rawmpi == NULL)
+ return gpg_err_code_from_syserror ();
+
+ if (with_prefix)
+ {
+ rawmpi[0] = 0x40;
+ rawmpilen++;
+ }
+
+ *r_buffer = rawmpi;
+ *r_buflen = rawmpilen;
+ return 0;
+}
+
+
+gpg_err_code_t
+_gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ec, mpi_point_t result)
+{
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+ unsigned int nbytes = (ec->nbits+7)/8;
+
+ /*
+ * It is not reliable to assume that the first byte of 0x40
+ * means the prefix.
+ *
+ * For newer implementation, it is reliable since we always put
+ * 0x40 for x-only coordinate.
+ *
+ * For data by older implementation (non-released development
+ * version in 2015), there is no 0x40 prefix added.
+ *
+ * So, it is possible to have shorter length of data when it was
+ * handled as MPI, removing preceding zeros.
+ *
+ * Besides, when data was parsed as MPI, we might have 0x00
+ * prefix (when the MSB in the first byte is set).
+ */
+
+ if (mpi_is_opaque (pk))
+ {
+ const unsigned char *buf;
+ unsigned char *p;
+
+ buf = mpi_get_opaque (pk, &rawmpilen);
+ if (!buf)
+ return GPG_ERR_INV_OBJ;
+ rawmpilen = (rawmpilen + 7)/8;
+
+ if (rawmpilen > nbytes
+ && (buf[0] == 0x00 || buf[0] == 0x40))
+ {
+ rawmpilen--;
+ buf++;
+ }
+
+ rawmpi = xtrymalloc (nbytes);
+ if (!rawmpi)
+ return gpg_err_code_from_syserror ();
+
+ p = rawmpi + rawmpilen;
+ while (p > rawmpi)
+ *--p = *buf++;
+
+ if (rawmpilen < nbytes)
+ memset (rawmpi + nbytes - rawmpilen, 0, nbytes - rawmpilen);
+ }
+ else
+ {
+ rawmpi = _gcry_mpi_get_buffer (pk, nbytes, &rawmpilen, NULL);
+ if (!rawmpi)
+ return gpg_err_code_from_syserror ();
+ /*
+ * When we have the prefix (0x40 or 0x00), it comes at the end,
+ * since it is taken by _gcry_mpi_get_buffer with little endian.
+ * Just setting RAWMPILEN to NBYTES is enough in this case.
+ * Othewise, RAWMPILEN is NBYTES already.
+ */
+ rawmpilen = nbytes;
+ }
+
+ if ((ec->nbits % 8))
+ rawmpi[0] &= (1 << (ec->nbits % 8)) - 1;
+ _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0);
+ xfree (rawmpi);
+ mpi_set_ui (result->z, 1);
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/ecc-sm2.c b/comm/third_party/libgcrypt/cipher/ecc-sm2.c
new file mode 100644
index 0000000000..c52629fd3f
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc-sm2.c
@@ -0,0 +1,569 @@
+/* ecc-sm2.c - Elliptic Curve SM2 implementation
+ * Copyright (C) 2020 Tianjia Zhang
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
+
+#define MPI_NBYTES(m) ((mpi_get_nbits(m) + 7) / 8)
+
+
+/* Key derivation function from X9.63/SECG */
+static gpg_err_code_t
+kdf_x9_63 (int algo, const void *in, size_t inlen, void *out, size_t outlen)
+{
+ gpg_err_code_t rc;
+ gcry_md_hd_t hd;
+ int mdlen;
+ u32 counter = 1;
+ u32 counter_be;
+ unsigned char *dgst;
+ unsigned char *pout = out;
+ size_t rlen = outlen;
+ size_t len;
+
+ rc = _gcry_md_open (&hd, algo, 0);
+ if (rc)
+ return rc;
+
+ mdlen = _gcry_md_get_algo_dlen (algo);
+
+ while (rlen > 0)
+ {
+ counter_be = be_bswap32 (counter); /* cpu_to_be32 */
+ counter++;
+
+ _gcry_md_write (hd, in, inlen);
+ _gcry_md_write (hd, &counter_be, sizeof(counter_be));
+
+ dgst = _gcry_md_read (hd, algo);
+ if (dgst == NULL)
+ {
+ rc = GPG_ERR_DIGEST_ALGO;
+ break;
+ }
+
+ len = mdlen < rlen ? mdlen : rlen; /* min(mdlen, rlen) */
+ memcpy (pout, dgst, len);
+ rlen -= len;
+ pout += len;
+
+ _gcry_md_reset (hd);
+ }
+
+ _gcry_md_close (hd);
+ return rc;
+}
+
+
+/* _gcry_ecc_sm2_encrypt description:
+ * input:
+ * data[0] : octet string
+ * output: A new S-expression with the parameters:
+ * a: c1 : generated ephemeral public key (kG)
+ * b: c3 : Hash(x2 || IN || y2)
+ * c: c2 : cipher
+ *
+ * sm2_decrypt description:
+ * in contrast to encrypt
+ */
+gpg_err_code_t
+_gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, gcry_mpi_t input, mpi_ec_t ec)
+{
+ gpg_err_code_t rc;
+ const int algo = GCRY_MD_SM3;
+ gcry_md_hd_t md = NULL;
+ int mdlen;
+ unsigned char *dgst;
+ gcry_mpi_t k = NULL;
+ mpi_point_struct kG, kP;
+ gcry_mpi_t x1, y1;
+ gcry_mpi_t x2, y2;
+ gcry_mpi_t x2y2 = NULL;
+ unsigned char *in = NULL;
+ unsigned int inlen;
+ unsigned char *raw;
+ unsigned int rawlen;
+ unsigned char *cipher = NULL;
+ int i;
+
+ point_init (&kG);
+ point_init (&kP);
+ x1 = mpi_new (0);
+ y1 = mpi_new (0);
+ x2 = mpi_new (0);
+ y2 = mpi_new (0);
+
+ in = _gcry_mpi_get_buffer (input, 0, &inlen, NULL);
+ if (!in)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ cipher = xtrymalloc (inlen);
+ if (!cipher)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ /* rand k in [1, n-1] */
+ k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM);
+
+ /* [k]G = (x1, y1) */
+ _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec);
+ if (_gcry_mpi_ec_get_affine (x1, y1, &kG, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("Bad check: kG can not be a Point at Infinity!\n");
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* [k]P = (x2, y2) */
+ _gcry_mpi_ec_mul_point (&kP, k, ec->Q, ec);
+ if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* t = KDF(x2 || y2, klen) */
+ x2y2 = _gcry_mpi_ec_ec2os (&kP, ec);
+ raw = mpi_get_opaque (x2y2, &rawlen);
+ rawlen = (rawlen + 7) / 8;
+
+ /* skip the prefix '0x04' */
+ raw += 1;
+ rawlen -= 1;
+ rc = kdf_x9_63 (algo, raw, rawlen, cipher, inlen);
+ if (rc)
+ goto leave;
+
+ /* cipher = t xor in */
+ for (i = 0; i < inlen; i++)
+ cipher[i] ^= in[i];
+
+ /* hash(x2 || IN || y2) */
+ mdlen = _gcry_md_get_algo_dlen (algo);
+ rc = _gcry_md_open (&md, algo, 0);
+ if (rc)
+ goto leave;
+ _gcry_md_write (md, raw, MPI_NBYTES(x2));
+ _gcry_md_write (md, in, inlen);
+ _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2));
+ dgst = _gcry_md_read (md, algo);
+ if (dgst == NULL)
+ {
+ rc = GPG_ERR_DIGEST_ALGO;
+ goto leave;
+ }
+
+ if (!rc)
+ {
+ gcry_mpi_t c1;
+ gcry_mpi_t c3;
+ gcry_mpi_t c2;
+
+ c3 = mpi_new (0);
+ c2 = mpi_new (0);
+
+ c1 = _gcry_ecc_ec2os (x1, y1, ec->p);
+ _gcry_mpi_set_opaque_copy (c3, dgst, mdlen * 8);
+ _gcry_mpi_set_opaque_copy (c2, cipher, inlen * 8);
+
+ rc = sexp_build (r_ciph, NULL,
+ "(enc-val(flags sm2)(sm2(a%M)(b%M)(c%M)))",
+ c1, c3, c2);
+
+ mpi_free (c1);
+ mpi_free (c3);
+ mpi_free (c2);
+ }
+
+leave:
+ _gcry_md_close (md);
+ mpi_free (x2y2);
+ mpi_free (k);
+
+ point_free (&kG);
+ point_free (&kP);
+ mpi_free (x1);
+ mpi_free (y1);
+ mpi_free (x2);
+ mpi_free (y2);
+
+ xfree (cipher);
+ xfree (in);
+
+ return rc;
+}
+
+
+gpg_err_code_t
+_gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t data_list, mpi_ec_t ec)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t data_c1 = NULL;
+ gcry_mpi_t data_c3 = NULL;
+ gcry_mpi_t data_c2 = NULL;
+
+ /*
+ * Extract the data.
+ */
+ rc = sexp_extract_param (data_list, NULL, "/a/b/c",
+ &data_c1, &data_c3, &data_c2, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_printmpi ("ecc_decrypt d_c1", data_c1);
+ log_printmpi ("ecc_decrypt d_c3", data_c3);
+ log_printmpi ("ecc_decrypt d_c2", data_c2);
+ }
+
+ {
+ const int algo = GCRY_MD_SM3;
+ gcry_md_hd_t md = NULL;
+ int mdlen;
+ unsigned char *dgst;
+ mpi_point_struct c1;
+ mpi_point_struct kP;
+ gcry_mpi_t x2, y2;
+ gcry_mpi_t x2y2 = NULL;
+ unsigned char *in = NULL;
+ unsigned int inlen;
+ unsigned char *plain = NULL;
+ unsigned char *raw;
+ unsigned int rawlen;
+ unsigned char *c3 = NULL;
+ unsigned int c3_len;
+ int i;
+
+ point_init (&c1);
+ point_init (&kP);
+ x2 = mpi_new (0);
+ y2 = mpi_new (0);
+
+ in = mpi_get_opaque (data_c2, &inlen);
+ inlen = (inlen + 7) / 8;
+ plain = xtrymalloc (inlen);
+ if (!plain)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave_main;
+ }
+
+ rc = _gcry_ecc_sec_decodepoint (data_c1, ec, &c1);
+ if (rc)
+ goto leave_main;
+
+ if (!_gcry_mpi_ec_curve_point (&c1, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave_main;
+ }
+
+ /* [d]C1 = (x2, y2), C1 = [k]G */
+ _gcry_mpi_ec_mul_point (&kP, ec->d, &c1, ec);
+ if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave_main;
+ }
+
+ /* t = KDF(x2 || y2, inlen) */
+ x2y2 = _gcry_mpi_ec_ec2os (&kP, ec);
+ raw = mpi_get_opaque (x2y2, &rawlen);
+ rawlen = (rawlen + 7) / 8;
+ /* skip the prefix '0x04' */
+ raw += 1;
+ rawlen -= 1;
+ rc = kdf_x9_63 (algo, raw, rawlen, plain, inlen);
+ if (rc)
+ goto leave_main;
+
+ /* plain = C2 xor t */
+ for (i = 0; i < inlen; i++)
+ plain[i] ^= in[i];
+
+ /* Hash(x2 || IN || y2) == C3 */
+ mdlen = _gcry_md_get_algo_dlen (algo);
+ rc = _gcry_md_open (&md, algo, 0);
+ if (rc)
+ goto leave_main;
+ _gcry_md_write (md, raw, MPI_NBYTES(x2));
+ _gcry_md_write (md, plain, inlen);
+ _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2));
+ dgst = _gcry_md_read (md, algo);
+ if (dgst == NULL)
+ {
+ memset (plain, 0, inlen);
+ rc = GPG_ERR_DIGEST_ALGO;
+ goto leave_main;
+ }
+ c3 = mpi_get_opaque (data_c3, &c3_len);
+ c3_len = (c3_len + 7) / 8;
+ if (c3_len != mdlen || memcmp (dgst, c3, c3_len) != 0)
+ {
+ memset (plain, 0, inlen);
+ rc = GPG_ERR_INV_DATA;
+ goto leave_main;
+ }
+
+ if (!rc)
+ {
+ gcry_mpi_t r;
+
+ r = mpi_new (inlen * 8);
+ _gcry_mpi_set_buffer (r, plain, inlen, 0);
+
+ rc = sexp_build (r_plain, NULL, "(value %m)", r);
+
+ mpi_free (r);
+ }
+
+ leave_main:
+ _gcry_md_close (md);
+ mpi_free (x2y2);
+ xfree (plain);
+
+ point_free (&c1);
+ point_free (&kP);
+ mpi_free (x2);
+ mpi_free (y2);
+ }
+
+ leave:
+ _gcry_mpi_release (data_c1);
+ _gcry_mpi_release (data_c3);
+ _gcry_mpi_release (data_c2);
+
+ return rc;
+}
+
+
+/* Compute an SM2 signature.
+ * Return the signature struct (r,s) from the message hash. The caller
+ * must have allocated R and S.
+ */
+gpg_err_code_t
+_gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s,
+ int flags, int hashalgo)
+{
+ gpg_err_code_t rc = 0;
+ int extraloops = 0;
+ gcry_mpi_t hash;
+ const void *abuf;
+ unsigned int abits, qbits;
+ gcry_mpi_t tmp = NULL;
+ gcry_mpi_t k = NULL;
+ gcry_mpi_t rk = NULL;
+ mpi_point_struct kG;
+ gcry_mpi_t x1;
+
+ if (DBG_CIPHER)
+ log_mpidump ("sm2 sign hash ", input);
+
+ qbits = mpi_get_nbits (ec->n);
+
+ /* Convert the INPUT into an MPI if needed. */
+ rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+ if (rc)
+ return rc;
+
+ point_init (&kG);
+ x1 = mpi_new (0);
+ rk = mpi_new (0);
+ tmp = mpi_new (0);
+
+ for (;;)
+ {
+ /* rand k in [1, n-1] */
+ if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo)
+ {
+ /* Use Pornin's method for deterministic DSA. If this
+ flag is set, it is expected that HASH is an opaque
+ MPI with the to be signed hash. That hash is also
+ used as h1 from 3.2.a. */
+ if (!mpi_is_opaque (input))
+ {
+ rc = GPG_ERR_CONFLICT;
+ goto leave;
+ }
+
+ abuf = mpi_get_opaque (input, &abits);
+ rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d,
+ abuf, (abits+7)/8,
+ hashalgo, extraloops);
+ if (rc)
+ goto leave;
+ extraloops++;
+ }
+ else
+ k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM);
+
+ _gcry_dsa_modify_k (k, ec->n, qbits);
+
+ /* [k]G = (x1, y1) */
+ _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec);
+ if (_gcry_mpi_ec_get_affine (x1, NULL, &kG, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* r = (e + x1) % n */
+ mpi_addm (r, hash, x1, ec->n);
+
+ /* r != 0 && r + k != n */
+ if (mpi_cmp_ui (r, 0) == 0)
+ continue;
+ mpi_add (rk, r, k);
+ if (mpi_cmp (rk, ec->n) == 0)
+ continue;
+
+ /* s = ((d + 1)^-1 * (k - rd)) % n */
+ mpi_addm (s, ec->d, GCRYMPI_CONST_ONE, ec->n);
+ mpi_invm (s, s, ec->n);
+ mpi_mulm (tmp, r, ec->d, ec->n);
+ mpi_subm (tmp, k, tmp, ec->n);
+ mpi_mulm (s, s, tmp, ec->n);
+
+ /* s != 0 */
+ if (mpi_cmp_ui (s, 0) == 0)
+ continue;
+
+ break; /* Okay */
+ }
+
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("sm2 sign result r ", r);
+ log_mpidump ("sm2 sign result s ", s);
+ }
+
+leave:
+ point_free (&kG);
+ mpi_free (k);
+ mpi_free (x1);
+ mpi_free (rk);
+ mpi_free (tmp);
+
+ if (hash != input)
+ mpi_free (hash);
+
+ return rc;
+}
+
+
+/* Verify an SM2 signature.
+ * Check if R and S verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec,
+ gcry_mpi_t r, gcry_mpi_t s)
+{
+ gpg_err_code_t err = 0;
+ gcry_mpi_t hash = NULL;
+ gcry_mpi_t t = NULL;
+ mpi_point_struct sG, tP;
+ gcry_mpi_t x1, y1;
+ unsigned int nbits;
+
+ if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+ return GPG_ERR_BROKEN_PUBKEY;
+
+ /* r, s within [1, n-1] */
+ if (mpi_cmp_ui (r, 1) < 0 || mpi_cmp (r, ec->n) > 0)
+ return GPG_ERR_BAD_SIGNATURE;
+ if (mpi_cmp_ui (s, 1) < 0 || mpi_cmp (s, ec->n) > 0)
+ return GPG_ERR_BAD_SIGNATURE;
+
+ nbits = mpi_get_nbits (ec->n);
+ err = _gcry_dsa_normalize_hash (input, &hash, nbits);
+ if (err)
+ return err;
+
+ point_init (&sG);
+ point_init (&tP);
+ x1 = mpi_new (0);
+ y1 = mpi_new (0);
+ t = mpi_new (0);
+
+ /* t = (r + s) % n, t != 0 */
+ mpi_addm (t, r, s, ec->n);
+ if (mpi_cmp_ui (t, 0) == 0)
+ {
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+
+ /* sG + tP = (x1, y1) */
+ _gcry_mpi_ec_mul_point (&sG, s, ec->G, ec);
+ _gcry_mpi_ec_mul_point (&tP, t, ec->Q, ec);
+ _gcry_mpi_ec_add_points (&sG, &sG, &tP, ec);
+ if (_gcry_mpi_ec_get_affine (x1, y1, &sG, ec))
+ {
+ err = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* R = (e + x1) % n */
+ mpi_addm (t, hash, x1, ec->n);
+
+ /* check R == r */
+ if (mpi_cmp (t, r))
+ {
+ if (DBG_CIPHER)
+ {
+ log_mpidump (" R", t);
+ log_mpidump (" r", r);
+ log_mpidump (" s", s);
+ }
+ err = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+ if (DBG_CIPHER)
+ log_debug ("sm2 verify: Accepted\n");
+
+ leave:
+ point_free (&sG);
+ point_free (&tP);
+ mpi_free (x1);
+ mpi_free (y1);
+ mpi_free (t);
+ if (hash != input)
+ mpi_free (hash);
+
+ return err;
+}
diff --git a/comm/third_party/libgcrypt/cipher/ecc.c b/comm/third_party/libgcrypt/cipher/ecc.c
new file mode 100644
index 0000000000..5d8c7607ab
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/ecc.c
@@ -0,0 +1,1779 @@
+/* ecc.c - Elliptic Curve Cryptography
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2015 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This code is originally based on the Patch 0.1.6 for the gnupg
+ 1.4.x branch as retrieved on 2007-03-21 from
+ http://www.calcurco.cat/eccGnuPG/src/gnupg-1.4.6-ecc0.2.0beta1.diff.bz2
+ The original authors are:
+ Written by
+ Sergi Blanch i Torne <d4372211 at alumnes.eup.udl.es>,
+ Ramiro Moreno Chiral <ramiro at eup.udl.es>
+ Maintainers
+ Sergi Blanch i Torne
+ Ramiro Moreno Chiral
+ Mikael Mylnikov (mmr)
+ For use in Libgcrypt the code has been heavily modified and cleaned
+ up. In fact there is not much left of the originally code except for
+ some variable names and the text book implementaion of the sign and
+ verification algorithms. The arithmetic functions have entirely
+ been rewritten and moved to mpi/ec.c.
+
+ ECDH encrypt and decrypt code written by Andrey Jivsov.
+*/
+
+
+/* TODO:
+
+ - In mpi/ec.c we use mpi_powm for x^2 mod p: Either implement a
+ special case in mpi_powm or check whether mpi_mulm is faster.
+
+*/
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
+
+
+static const char *ecc_names[] =
+ {
+ "ecc",
+ "ecdsa",
+ "ecdh",
+ "eddsa",
+ "gost",
+ "sm2",
+ NULL,
+ };
+
+
+/* Sample NIST P-256 key from RFC 6979 A.2.5 */
+static const char sample_public_key_secp256[] =
+ "(public-key"
+ " (ecc"
+ " (curve secp256r1)"
+ " (q #04"
+ /**/ "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6"
+ /**/ "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))";
+
+static const char sample_secret_key_secp256[] =
+ "(private-key"
+ " (ecc"
+ " (curve secp256r1)"
+ " (d #C9AFA9D845BA75166B5C215767B1D6934E50C3DB36E89B127B8A622B120F6721#)"
+ " (q #04"
+ /**/ "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6"
+ /**/ "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))";
+
+
+/* Registered progress function and its callback value. */
+static void (*progress_cb) (void *, const char*, int, int, int);
+static void *progress_cb_data;
+
+
+
+/* Local prototypes. */
+static void test_keys (mpi_ec_t ec, unsigned int nbits);
+static void test_ecdh_only_keys (mpi_ec_t ec, unsigned int nbits, int flags);
+static unsigned int ecc_get_nbits (gcry_sexp_t parms);
+
+
+
+
+void
+_gcry_register_pk_ecc_progress (void (*cb) (void *, const char *,
+ int, int, int),
+ void *cb_data)
+{
+ progress_cb = cb;
+ progress_cb_data = cb_data;
+}
+
+/* static void */
+/* progress (int c) */
+/* { */
+/* if (progress_cb) */
+/* progress_cb (progress_cb_data, "pk_ecc", c, 0, 0); */
+/* } */
+
+
+
+/**
+ * nist_generate_key - Standard version of the ECC key generation.
+ * @ec: Elliptic curve computation context.
+ * @flags: Flags controlling aspects of the creation.
+ * @r_x: On success this receives an allocated MPI with the affine
+ * x-coordinate of the poblic key. On error NULL is stored.
+ * @r_y: Ditto for the y-coordinate.
+ *
+ * Return: An error code.
+ *
+ * The @flags bits used by this function are %PUBKEY_FLAG_TRANSIENT to
+ * use a faster RNG, and %PUBKEY_FLAG_NO_KEYTEST to skip the assertion
+ * that the key works as expected.
+ *
+ * FIXME: Check whether N is needed.
+ */
+static gpg_err_code_t
+nist_generate_key (mpi_ec_t ec, int flags,
+ gcry_mpi_t *r_x, gcry_mpi_t *r_y)
+{
+ mpi_point_struct Q;
+ gcry_random_level_t random_level;
+ gcry_mpi_t x, y;
+ const unsigned int pbits = ec->nbits;
+
+ point_init (&Q);
+
+ if ((flags & PUBKEY_FLAG_TRANSIENT_KEY))
+ random_level = GCRY_STRONG_RANDOM;
+ else
+ random_level = GCRY_VERY_STRONG_RANDOM;
+
+ /* Generate a secret. */
+ if (ec->dialect == ECC_DIALECT_ED25519
+ || ec->dialect == ECC_DIALECT_SAFECURVE
+ || (flags & PUBKEY_FLAG_DJB_TWEAK))
+ {
+ char *rndbuf;
+ int len = (pbits+7)/8;
+
+ rndbuf = _gcry_random_bytes_secure (len, random_level);
+ if (ec->dialect == ECC_DIALECT_SAFECURVE)
+ ec->d = mpi_set_opaque (NULL, rndbuf, len*8);
+ else
+ {
+ ec->d = mpi_snew (pbits);
+ if ((pbits % 8))
+ rndbuf[0] &= (1 << (pbits % 8)) - 1;
+ rndbuf[0] |= (1 << ((pbits + 7) % 8));
+ rndbuf[len-1] &= (256 - ec->h);
+ _gcry_mpi_set_buffer (ec->d, rndbuf, len, 0);
+ xfree (rndbuf);
+ }
+ }
+ else
+ ec->d = _gcry_dsa_gen_k (ec->n, random_level);
+
+ /* Compute Q. */
+ _gcry_mpi_ec_mul_point (&Q, ec->d, ec->G, ec);
+
+ x = mpi_new (pbits);
+ if (r_y == NULL)
+ y = NULL;
+ else
+ y = mpi_new (pbits);
+ if (_gcry_mpi_ec_get_affine (x, y, &Q, ec))
+ log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");
+
+ /* We want the Q=(x,y) be a "compliant key" in terms of the
+ * http://tools.ietf.org/html/draft-jivsov-ecc-compact, which simply
+ * means that we choose either Q=(x,y) or -Q=(x,p-y) such that we
+ * end up with the min(y,p-y) as the y coordinate. Such a public
+ * key allows the most efficient compression: y can simply be
+ * dropped because we know that it's a minimum of the two
+ * possibilities without any loss of security. Note that we don't
+ * do that for Ed25519 so that we do not violate the special
+ * construction of the secret key. */
+ if (r_y == NULL || ec->dialect == ECC_DIALECT_ED25519)
+ ec->Q = mpi_point_set (NULL, Q.x, Q.y, Q.z);
+ else
+ {
+ gcry_mpi_t negative;
+
+ negative = mpi_new (pbits);
+
+ if (ec->model == MPI_EC_WEIERSTRASS)
+ mpi_sub (negative, ec->p, y); /* negative = p - y */
+ else
+ mpi_sub (negative, ec->p, x); /* negative = p - x */
+
+ if (mpi_cmp (negative, y) < 0) /* p - y < p */
+ {
+ /* We need to end up with -Q; this assures that new Q's y is
+ the smallest one */
+ if (ec->model == MPI_EC_WEIERSTRASS)
+ {
+ mpi_free (y);
+ y = negative;
+ }
+ else
+ {
+ mpi_free (x);
+ x = negative;
+ }
+ mpi_sub (ec->d, ec->n, ec->d); /* d = order - d */
+ ec->Q = mpi_point_set (NULL, x, y, mpi_const (MPI_C_ONE));
+
+ if (DBG_CIPHER)
+ log_debug ("ecgen converted Q to a compliant point\n");
+ }
+ else /* p - y >= p */
+ {
+ /* No change is needed exactly 50% of the time: just copy. */
+ mpi_free (negative);
+ ec->Q = mpi_point_set (NULL, Q.x, Q.y, Q.z);
+ if (DBG_CIPHER)
+ log_debug ("ecgen didn't need to convert Q to a compliant point\n");
+ }
+ }
+
+ *r_x = x;
+ if (r_y)
+ *r_y = y;
+
+ point_free (&Q);
+ /* Now we can test our keys (this should never fail!). */
+ if ((flags & PUBKEY_FLAG_NO_KEYTEST))
+ ; /* User requested to skip the test. */
+ else if (ec->model == MPI_EC_MONTGOMERY)
+ test_ecdh_only_keys (ec, ec->nbits - 63, flags);
+ else
+ test_keys (ec, ec->nbits - 64);
+
+ return 0;
+}
+
+
+/*
+ * To verify correct skey it use a random information.
+ * First, encrypt and decrypt this dummy value,
+ * test if the information is recuperated.
+ * Second, test with the sign and verify functions.
+ */
+static void
+test_keys (mpi_ec_t ec, unsigned int nbits)
+{
+ gcry_mpi_t test = mpi_new (nbits);
+ mpi_point_struct R_;
+ gcry_mpi_t c = mpi_new (nbits);
+ gcry_mpi_t out = mpi_new (nbits);
+ gcry_mpi_t r = mpi_new (nbits);
+ gcry_mpi_t s = mpi_new (nbits);
+
+ if (DBG_CIPHER)
+ log_debug ("Testing key.\n");
+
+ point_init (&R_);
+
+ _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM);
+
+ if (_gcry_ecc_ecdsa_sign (test, ec, r, s, 0, 0) )
+ log_fatal ("ECDSA operation: sign failed\n");
+
+ if (_gcry_ecc_ecdsa_verify (test, ec, r, s))
+ {
+ log_fatal ("ECDSA operation: sign, verify failed\n");
+ }
+
+ if (DBG_CIPHER)
+ log_debug ("ECDSA operation: sign, verify ok.\n");
+
+ point_free (&R_);
+ mpi_free (s);
+ mpi_free (r);
+ mpi_free (out);
+ mpi_free (c);
+ mpi_free (test);
+}
+
+
+static void
+test_ecdh_only_keys (mpi_ec_t ec, unsigned int nbits, int flags)
+{
+ gcry_mpi_t test;
+ mpi_point_struct R_;
+ gcry_mpi_t x0, x1;
+
+ if (DBG_CIPHER)
+ log_debug ("Testing ECDH only key.\n");
+
+ point_init (&R_);
+
+ if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK))
+ {
+ char *rndbuf;
+ const unsigned int pbits = ec->nbits;
+ int len = (pbits+7)/8;
+
+ rndbuf = _gcry_random_bytes (len, GCRY_WEAK_RANDOM);
+ if (ec->dialect == ECC_DIALECT_SAFECURVE)
+ test = mpi_set_opaque (NULL, rndbuf, len*8);
+ else
+ {
+ test = mpi_new (pbits);
+ if ((pbits % 8))
+ rndbuf[0] &= (1 << (pbits % 8)) - 1;
+ rndbuf[0] |= (1 << ((pbits + 7) % 8));
+ rndbuf[len-1] &= (256 - ec->h);
+ _gcry_mpi_set_buffer (test, rndbuf, len, 0);
+ xfree (rndbuf);
+ }
+ }
+ else
+ {
+ test = mpi_new (nbits);
+ _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM);
+ }
+
+ x0 = mpi_new (0);
+ x1 = mpi_new (0);
+
+ /* R_ = hkQ <=> R_ = hkdG */
+ _gcry_mpi_ec_mul_point (&R_, test, ec->Q, ec);
+ if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK))
+ _gcry_mpi_ec_mul_point (&R_, _gcry_mpi_get_const (ec->h), &R_, ec);
+ if (_gcry_mpi_ec_get_affine (x0, NULL, &R_, ec))
+ log_fatal ("ecdh: Failed to get affine coordinates for hkQ\n");
+
+ _gcry_mpi_ec_mul_point (&R_, test, ec->G, ec);
+ _gcry_mpi_ec_mul_point (&R_, ec->d, &R_, ec);
+ /* R_ = hdkG */
+ if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK))
+ _gcry_mpi_ec_mul_point (&R_, _gcry_mpi_get_const (ec->h), &R_, ec);
+
+ if (_gcry_mpi_ec_get_affine (x1, NULL, &R_, ec))
+ log_fatal ("ecdh: Failed to get affine coordinates for hdkG\n");
+
+ if (mpi_cmp (x0, x1))
+ {
+ log_fatal ("ECDH test failed.\n");
+ }
+
+ mpi_free (x0);
+ mpi_free (x1);
+
+ point_free (&R_);
+ mpi_free (test);
+}
+
+
+/*
+ * To check the validity of the value, recalculate the correspondence
+ * between the public value and the secret one.
+ */
+static int
+check_secret_key (mpi_ec_t ec, int flags)
+{
+ int rc = 1;
+ mpi_point_struct Q;
+ gcry_mpi_t x1, y1;
+ gcry_mpi_t x2 = NULL;
+ gcry_mpi_t y2 = NULL;
+
+ point_init (&Q);
+ x1 = mpi_new (0);
+ if (ec->model == MPI_EC_MONTGOMERY)
+ y1 = NULL;
+ else
+ y1 = mpi_new (0);
+
+ /* G in E(F_p) */
+ if (!_gcry_mpi_ec_curve_point (ec->G, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("Bad check: Point 'G' does not belong to curve 'E'!\n");
+ goto leave;
+ }
+
+ /* G != PaI */
+ if (!mpi_cmp_ui (ec->G->z, 0))
+ {
+ if (DBG_CIPHER)
+ log_debug ("Bad check: 'G' cannot be Point at Infinity!\n");
+ goto leave;
+ }
+
+ /* Check order of curve. */
+ if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK))
+ {
+ _gcry_mpi_ec_mul_point (&Q, ec->n, ec->G, ec);
+ if (mpi_cmp_ui (Q.z, 0))
+ {
+ if (DBG_CIPHER)
+ log_debug ("check_secret_key: E is not a curve of order n\n");
+ goto leave;
+ }
+ }
+
+ /* Pubkey cannot be PaI */
+ if (!mpi_cmp_ui (ec->Q->z, 0))
+ {
+ if (DBG_CIPHER)
+ log_debug ("Bad check: Q can not be a Point at Infinity!\n");
+ goto leave;
+ }
+
+ /* pubkey = [d]G over E */
+ if (!_gcry_ecc_compute_public (&Q, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("Bad check: computation of dG failed\n");
+ goto leave;
+ }
+ if (_gcry_mpi_ec_get_affine (x1, y1, &Q, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("Bad check: Q can not be a Point at Infinity!\n");
+ goto leave;
+ }
+
+ if ((flags & PUBKEY_FLAG_EDDSA)
+ || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE))
+ ; /* Fixme: EdDSA is special. */
+ else if (!mpi_cmp_ui (ec->Q->z, 1))
+ {
+ /* Fast path if Q is already in affine coordinates. */
+ if (mpi_cmp (x1, ec->Q->x) || (y1 && mpi_cmp (y1, ec->Q->y)))
+ {
+ if (DBG_CIPHER)
+ log_debug
+ ("Bad check: There is NO correspondence between 'd' and 'Q'!\n");
+ goto leave;
+ }
+ }
+ else
+ {
+ x2 = mpi_new (0);
+ y2 = mpi_new (0);
+ if (_gcry_mpi_ec_get_affine (x2, y2, ec->Q, ec))
+ {
+ if (DBG_CIPHER)
+ log_debug ("Bad check: Q can not be a Point at Infinity!\n");
+ goto leave;
+ }
+
+ if (mpi_cmp (x1, x2) || mpi_cmp (y1, y2))
+ {
+ if (DBG_CIPHER)
+ log_debug
+ ("Bad check: There is NO correspondence between 'd' and 'Q'!\n");
+ goto leave;
+ }
+ }
+ rc = 0; /* Okay. */
+
+ leave:
+ mpi_free (x2);
+ mpi_free (x1);
+ mpi_free (y1);
+ mpi_free (y2);
+ point_free (&Q);
+ return rc;
+}
+
+
+
+/*********************************************
+ ************** interface ******************
+ *********************************************/
+
+static gcry_err_code_t
+ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t Gx = NULL;
+ gcry_mpi_t Gy = NULL;
+ gcry_mpi_t Qx = NULL;
+ gcry_mpi_t Qy = NULL;
+ mpi_ec_t ec = NULL;
+ gcry_sexp_t curve_info = NULL;
+ gcry_sexp_t curve_flags = NULL;
+ gcry_mpi_t base = NULL;
+ gcry_mpi_t public = NULL;
+ int flags = 0;
+
+ rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecgen curve", genparms, NULL);
+ if (rc)
+ goto leave;
+
+ if ((flags & PUBKEY_FLAG_EDDSA)
+ || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE))
+ rc = _gcry_ecc_eddsa_genkey (ec, flags);
+ else if (ec->model == MPI_EC_MONTGOMERY)
+ rc = nist_generate_key (ec, flags, &Qx, NULL);
+ else
+ rc = nist_generate_key (ec, flags, &Qx, &Qy);
+ if (rc)
+ goto leave;
+
+ /* Copy data to the result. */
+ Gx = mpi_new (0);
+ Gy = mpi_new (0);
+ if (ec->model != MPI_EC_MONTGOMERY)
+ {
+ if (_gcry_mpi_ec_get_affine (Gx, Gy, ec->G, ec))
+ log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G");
+ base = _gcry_ecc_ec2os (Gx, Gy, ec->p);
+ }
+ if (((ec->dialect == ECC_DIALECT_SAFECURVE && ec->model == MPI_EC_EDWARDS)
+ || ec->dialect == ECC_DIALECT_ED25519 || ec->model == MPI_EC_MONTGOMERY)
+ && !(flags & PUBKEY_FLAG_NOCOMP))
+ {
+ unsigned char *encpk;
+ unsigned int encpklen;
+
+ if (ec->model == MPI_EC_MONTGOMERY)
+ rc = _gcry_ecc_mont_encodepoint (Qx, ec->nbits,
+ ec->dialect != ECC_DIALECT_SAFECURVE,
+ &encpk, &encpklen);
+ else
+ /* (Gx and Gy are used as scratch variables) */
+ rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, Gx, Gy,
+ (ec->dialect != ECC_DIALECT_SAFECURVE
+ && !!(flags & PUBKEY_FLAG_COMP)),
+ &encpk, &encpklen);
+ if (rc)
+ goto leave;
+ public = mpi_new (0);
+ mpi_set_opaque (public, encpk, encpklen*8);
+ }
+ else
+ {
+ if (!Qx)
+ {
+ /* This is the case for a key from _gcry_ecc_eddsa_generate
+ with no compression. */
+ Qx = mpi_new (0);
+ Qy = mpi_new (0);
+ if (_gcry_mpi_ec_get_affine (Qx, Qy, ec->Q, ec))
+ log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");
+ }
+ public = _gcry_ecc_ec2os (Qx, Qy, ec->p);
+ }
+ if (ec->name)
+ {
+ rc = sexp_build (&curve_info, NULL, "(curve %s)", ec->name);
+ if (rc)
+ goto leave;
+ }
+
+ if ((flags & PUBKEY_FLAG_PARAM) || (flags & PUBKEY_FLAG_EDDSA)
+ || (flags & PUBKEY_FLAG_DJB_TWEAK))
+ {
+ rc = sexp_build
+ (&curve_flags, NULL,
+ ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))?
+ "(flags param eddsa)" :
+ ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_DJB_TWEAK))?
+ "(flags param djb-tweak)" :
+ ((flags & PUBKEY_FLAG_PARAM))?
+ "(flags param)" : ((flags & PUBKEY_FLAG_EDDSA))?
+ "(flags eddsa)" : "(flags djb-tweak)" );
+ if (rc)
+ goto leave;
+ }
+
+ if ((flags & PUBKEY_FLAG_PARAM) && ec->name)
+ rc = sexp_build (r_skey, NULL,
+ "(key-data"
+ " (public-key"
+ " (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)))"
+ " (private-key"
+ " (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)(d%m)))"
+ " )",
+ curve_info, curve_flags,
+ ec->p, ec->a, ec->b, base, ec->n, ec->h, public,
+ curve_info, curve_flags,
+ ec->p, ec->a, ec->b, base, ec->n, ec->h, public,
+ ec->d);
+ else
+ rc = sexp_build (r_skey, NULL,
+ "(key-data"
+ " (public-key"
+ " (ecc%S%S(q%m)))"
+ " (private-key"
+ " (ecc%S%S(q%m)(d%m)))"
+ " )",
+ curve_info, curve_flags,
+ public,
+ curve_info, curve_flags,
+ public, ec->d);
+ if (rc)
+ goto leave;
+
+ if (DBG_CIPHER)
+ {
+ log_printmpi ("ecgen result p", ec->p);
+ log_printmpi ("ecgen result a", ec->a);
+ log_printmpi ("ecgen result b", ec->b);
+ log_printmpi ("ecgen result G", base);
+ log_printmpi ("ecgen result n", ec->n);
+ log_debug ("ecgen result h:+%02x\n", ec->h);
+ log_printmpi ("ecgen result Q", public);
+ log_printmpi ("ecgen result d", ec->d);
+ if ((flags & PUBKEY_FLAG_EDDSA))
+ log_debug ("ecgen result using Ed25519+EdDSA\n");
+ }
+
+ leave:
+ mpi_free (public);
+ mpi_free (base);
+ mpi_free (Gx);
+ mpi_free (Gy);
+ mpi_free (Qx);
+ mpi_free (Qy);
+ _gcry_mpi_ec_free (ec);
+ sexp_release (curve_flags);
+ sexp_release (curve_info);
+ return rc;
+}
+
+
+static gcry_err_code_t
+ecc_check_secret_key (gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ int flags = 0;
+ mpi_ec_t ec = NULL;
+
+ /*
+ * Extract the key.
+ */
+ rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_testkey", keyparms, NULL);
+ if (rc)
+ goto leave;
+ if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q || !ec->d)
+ {
+ rc = GPG_ERR_NO_OBJ;
+ goto leave;
+ }
+
+ if (check_secret_key (ec, flags))
+ rc = GPG_ERR_BAD_SECKEY;
+
+ leave:
+ _gcry_mpi_ec_free (ec);
+ if (DBG_CIPHER)
+ log_debug ("ecc_testkey => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_mpi_t data = NULL;
+ gcry_mpi_t sig_r = NULL;
+ gcry_mpi_t sig_s = NULL;
+ mpi_ec_t ec = NULL;
+ int flags = 0;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, 0);
+
+ /*
+ * Extract the key.
+ */
+ rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_sign", keyparms, NULL);
+ if (rc)
+ goto leave;
+ if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->d)
+ {
+ rc = GPG_ERR_NO_OBJ;
+ goto leave;
+ }
+
+ ctx.flags |= flags;
+ if (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE)
+ ctx.flags |= PUBKEY_FLAG_EDDSA;
+ /* Clear hash algo for EdDSA. */
+ if ((ctx.flags & PUBKEY_FLAG_EDDSA))
+ ctx.hash_algo = GCRY_MD_NONE;
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("ecc_sign data", data);
+
+ /* Hash algo is determined by curve in EdDSA. Fill it if not specified. */
+ if ((ctx.flags & PUBKEY_FLAG_EDDSA) && !ctx.hash_algo)
+ {
+ if (ec->dialect == ECC_DIALECT_ED25519)
+ ctx.hash_algo = GCRY_MD_SHA512;
+ else if (ec->dialect == ECC_DIALECT_SAFECURVE)
+ ctx.hash_algo = GCRY_MD_SHAKE256;
+ }
+
+ sig_r = mpi_new (0);
+ sig_s = mpi_new (0);
+ if ((ctx.flags & PUBKEY_FLAG_EDDSA))
+ {
+ /* EdDSA requires the public key. */
+ rc = _gcry_ecc_eddsa_sign (data, ec, sig_r, sig_s, &ctx);
+ if (!rc)
+ rc = sexp_build (r_sig, NULL,
+ "(sig-val(eddsa(r%M)(s%M)))", sig_r, sig_s);
+ }
+ else if ((ctx.flags & PUBKEY_FLAG_GOST))
+ {
+ rc = _gcry_ecc_gost_sign (data, ec, sig_r, sig_s);
+ if (!rc)
+ rc = sexp_build (r_sig, NULL,
+ "(sig-val(gost(r%M)(s%M)))", sig_r, sig_s);
+ }
+ else if ((ctx.flags & PUBKEY_FLAG_SM2))
+ {
+ rc = _gcry_ecc_sm2_sign (data, ec, sig_r, sig_s,
+ ctx.flags, ctx.hash_algo);
+ if (!rc)
+ rc = sexp_build (r_sig, NULL,
+ "(sig-val(sm2(r%M)(s%M)))", sig_r, sig_s);
+ }
+ else
+ {
+ rc = _gcry_ecc_ecdsa_sign (data, ec, sig_r, sig_s,
+ ctx.flags, ctx.hash_algo);
+ if (!rc)
+ rc = sexp_build (r_sig, NULL,
+ "(sig-val(ecdsa(r%M)(s%M)))", sig_r, sig_s);
+ }
+
+ leave:
+ _gcry_mpi_release (sig_r);
+ _gcry_mpi_release (sig_s);
+ _gcry_mpi_release (data);
+ _gcry_mpi_ec_free (ec);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("ecc_sign => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_sexp_t l1 = NULL;
+ gcry_mpi_t sig_r = NULL;
+ gcry_mpi_t sig_s = NULL;
+ gcry_mpi_t data = NULL;
+ int sigflags;
+ mpi_ec_t ec = NULL;
+ int flags = 0;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY,
+ ecc_get_nbits (s_keyparms));
+
+ /*
+ * Extract the key.
+ */
+ rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_verify",
+ s_keyparms, NULL);
+ if (rc)
+ goto leave;
+ if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q)
+ {
+ rc = GPG_ERR_NO_OBJ;
+ goto leave;
+ }
+
+ if (ec->model == MPI_EC_MONTGOMERY)
+ {
+ if (DBG_CIPHER)
+ log_debug ("ecc_verify: Can't use a Montgomery curve\n");
+ rc = GPG_ERR_INTERNAL;
+ goto leave;
+ }
+
+ ctx.flags |= flags;
+ if (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE)
+ ctx.flags |= PUBKEY_FLAG_EDDSA;
+ /* Clear hash algo for EdDSA. */
+ if ((ctx.flags & PUBKEY_FLAG_EDDSA))
+ ctx.hash_algo = GCRY_MD_NONE;
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("ecc_verify data", data);
+
+ /* Hash algo is determined by curve in EdDSA. Fill it if not specified. */
+ if ((ctx.flags & PUBKEY_FLAG_EDDSA) && !ctx.hash_algo)
+ {
+ if (ec->dialect == ECC_DIALECT_ED25519)
+ ctx.hash_algo = GCRY_MD_SHA512;
+ else if (ec->dialect == ECC_DIALECT_SAFECURVE)
+ ctx.hash_algo = GCRY_MD_SHAKE256;
+ }
+
+ /*
+ * Extract the signature value.
+ */
+ rc = _gcry_pk_util_preparse_sigval (s_sig, ecc_names, &l1, &sigflags);
+ if (rc)
+ goto leave;
+ rc = sexp_extract_param (l1, NULL, (sigflags & PUBKEY_FLAG_EDDSA)? "/rs":"rs",
+ &sig_r, &sig_s, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("ecc_verify s_r", sig_r);
+ log_mpidump ("ecc_verify s_s", sig_s);
+ }
+ if ((ctx.flags & PUBKEY_FLAG_EDDSA) ^ (sigflags & PUBKEY_FLAG_EDDSA))
+ {
+ rc = GPG_ERR_CONFLICT; /* Inconsistent use of flag/algoname. */
+ goto leave;
+ }
+
+ /*
+ * Verify the signature.
+ */
+ if ((sigflags & PUBKEY_FLAG_EDDSA))
+ {
+ rc = _gcry_ecc_eddsa_verify (data, ec, sig_r, sig_s, &ctx);
+ }
+ else if ((sigflags & PUBKEY_FLAG_GOST))
+ {
+ rc = _gcry_ecc_gost_verify (data, ec, sig_r, sig_s);
+ }
+ else if ((sigflags & PUBKEY_FLAG_SM2))
+ {
+ rc = _gcry_ecc_sm2_verify (data, ec, sig_r, sig_s);
+ }
+ else
+ {
+ rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s);
+ }
+
+ leave:
+ _gcry_mpi_release (data);
+ _gcry_mpi_release (sig_r);
+ _gcry_mpi_release (sig_s);
+ _gcry_mpi_ec_free (ec);
+ sexp_release (l1);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("ecc_verify => %s\n", rc?gpg_strerror (rc):"Good");
+ return rc;
+}
+
+
+/* ecdh raw is classic 2-round DH protocol published in 1976.
+ *
+ * Overview of ecc_encrypt_raw and ecc_decrypt_raw.
+ *
+ * As with any PK operation, encrypt version uses a public key and
+ * decrypt -- private.
+ *
+ * Symbols used below:
+ * G - field generator point
+ * d - private long-term scalar
+ * dG - public long-term key
+ * k - ephemeral scalar
+ * kG - ephemeral public key
+ * dkG - shared secret
+ *
+ * ecc_encrypt_raw description:
+ * input:
+ * data[0] : private scalar (k)
+ * output: A new S-expression with the parameters:
+ * s : shared point (kdG)
+ * e : generated ephemeral public key (kG)
+ *
+ * ecc_decrypt_raw description:
+ * input:
+ * data[0] : a point kG (ephemeral public key)
+ * output:
+ * result[0] : shared point (kdG)
+ */
+static gcry_err_code_t
+ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ unsigned int nbits;
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_mpi_t mpi_s = NULL;
+ gcry_mpi_t mpi_e = NULL;
+ gcry_mpi_t data = NULL;
+ mpi_ec_t ec = NULL;
+ int flags = 0;
+ int no_error_on_infinity;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT,
+ (nbits = ecc_get_nbits (keyparms)));
+
+ /*
+ * Extract the key.
+ */
+ rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_encrypt", keyparms, NULL);
+ if (rc)
+ goto leave;
+
+ if (ec->dialect == ECC_DIALECT_SAFECURVE)
+ {
+ ctx.flags |= PUBKEY_FLAG_RAW_FLAG;
+ no_error_on_infinity = 1;
+ }
+ else if ((flags & PUBKEY_FLAG_DJB_TWEAK))
+ no_error_on_infinity = 1;
+ else
+ no_error_on_infinity = 0;
+
+ /*
+ * Extract the data.
+ */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+
+ /*
+ * Tweak the scalar bits by cofactor and number of bits of the field.
+ * It assumes the cofactor is a power of 2.
+ */
+ if ((flags & PUBKEY_FLAG_DJB_TWEAK))
+ {
+ int i;
+
+ for (i = 0; (ec->h & (1 << i)) == 0; i++)
+ mpi_clear_bit (data, i);
+ mpi_set_highbit (data, ec->nbits - 1);
+ }
+ if (DBG_CIPHER)
+ log_mpidump ("ecc_encrypt data", data);
+
+ if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q)
+ {
+ rc = GPG_ERR_NO_OBJ;
+ goto leave;
+ }
+
+ if ((ctx.flags & PUBKEY_FLAG_SM2))
+ {
+ /* All encryption will be done, return it. */
+ rc = _gcry_ecc_sm2_encrypt (r_ciph, data, ec);
+ goto leave;
+ }
+
+ /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */
+ {
+ mpi_point_struct R; /* Result that we return. */
+ gcry_mpi_t x, y;
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+
+ rc = 0;
+ x = mpi_new (0);
+ if (ec->model == MPI_EC_MONTGOMERY)
+ y = NULL;
+ else
+ y = mpi_new (0);
+
+ point_init (&R);
+
+ /* R = kQ <=> R = kdG */
+ _gcry_mpi_ec_mul_point (&R, data, ec->Q, ec);
+
+ if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
+ {
+ /*
+ * Here, X is 0. In the X25519 computation on Curve25519, X0
+ * function maps infinity to zero. So, when PUBKEY_FLAG_DJB_TWEAK
+ * is enabled, return the result of 0 not raising an error.
+ *
+ * This is a corner case. It never occurs with properly
+ * generated public keys, but it might happen with blindly
+ * imported public key which might not follow the key
+ * generation procedure.
+ */
+ if (!no_error_on_infinity)
+ { /* It's not for X25519, then, the input data was simply wrong. */
+ rc = GPG_ERR_INV_DATA;
+ goto leave_main;
+ }
+ }
+ if (y)
+ mpi_s = _gcry_ecc_ec2os (x, y, ec->p);
+ else
+ {
+ rc = _gcry_ecc_mont_encodepoint (x, nbits,
+ ec->dialect != ECC_DIALECT_SAFECURVE,
+ &rawmpi, &rawmpilen);
+ if (rc)
+ goto leave_main;
+ mpi_s = mpi_new (0);
+ mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8);
+ }
+
+ /* R = kG */
+ _gcry_mpi_ec_mul_point (&R, data, ec->G, ec);
+
+ if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave_main;
+ }
+ if (y)
+ mpi_e = _gcry_ecc_ec2os (x, y, ec->p);
+ else
+ {
+ rc = _gcry_ecc_mont_encodepoint (x, nbits,
+ ec->dialect != ECC_DIALECT_SAFECURVE,
+ &rawmpi, &rawmpilen);
+ if (!rc)
+ {
+ mpi_e = mpi_new (0);
+ mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8);
+ }
+ }
+
+ leave_main:
+ mpi_free (x);
+ mpi_free (y);
+ point_free (&R);
+ if (rc)
+ goto leave;
+ }
+
+ if (!rc)
+ rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e);
+
+ leave:
+ _gcry_mpi_release (data);
+ _gcry_mpi_release (mpi_s);
+ _gcry_mpi_release (mpi_e);
+ _gcry_mpi_ec_free (ec);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("ecc_encrypt => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+/* input:
+ * data[0] : a point kG (ephemeral public key)
+ * output:
+ * resaddr[0] : shared point kdG
+ *
+ * see ecc_encrypt_raw for details.
+ */
+static gcry_err_code_t
+ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ unsigned int nbits;
+ gpg_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_sexp_t l1 = NULL;
+ gcry_mpi_t data_e = NULL;
+ mpi_ec_t ec = NULL;
+ mpi_point_struct kG;
+ mpi_point_struct R;
+ gcry_mpi_t r = NULL;
+ int flags = 0;
+ int enable_specific_point_validation;
+
+ point_init (&kG);
+ point_init (&R);
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT,
+ (nbits = ecc_get_nbits (keyparms)));
+
+ /*
+ * Extract the key.
+ */
+ rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_decrypt", keyparms, NULL);
+ if (rc)
+ goto leave;
+
+ if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->d)
+ {
+ rc = GPG_ERR_NO_OBJ;
+ goto leave;
+ }
+
+ /*
+ * Extract the data.
+ */
+ rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx);
+ if (rc)
+ goto leave;
+ if ((ctx.flags & PUBKEY_FLAG_SM2))
+ {
+ /* All decryption will be done, return it. */
+ rc = _gcry_ecc_sm2_decrypt (r_plain, l1, ec);
+ goto leave;
+ }
+ else
+ {
+ rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printmpi ("ecc_decrypt d_e", data_e);
+ }
+
+ if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK))
+ enable_specific_point_validation = 1;
+ else
+ enable_specific_point_validation = 0;
+
+ /*
+ * Compute the plaintext.
+ */
+ if (ec->model == MPI_EC_MONTGOMERY)
+ rc = _gcry_ecc_mont_decodepoint (data_e, ec, &kG);
+ else
+ rc = _gcry_ecc_sec_decodepoint (data_e, ec, &kG);
+ if (rc)
+ goto leave;
+
+ if (DBG_CIPHER)
+ log_printpnt ("ecc_decrypt kG", &kG, NULL);
+
+ if (enable_specific_point_validation)
+ {
+ /* For X25519, by its definition, validation should not be done. */
+ /* (Instead, we do output check.)
+ *
+ * However, to mitigate secret key leak from our implementation,
+ * we also do input validation here. For constant-time
+ * implementation, we can remove this input validation.
+ */
+ if (_gcry_mpi_ec_bad_point (&kG, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+ }
+ else if (!_gcry_mpi_ec_curve_point (&kG, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* R = dkG */
+ _gcry_mpi_ec_mul_point (&R, ec->d, &kG, ec);
+
+ /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so: */
+ {
+ gcry_mpi_t x, y;
+
+ x = mpi_new (0);
+ if (ec->model == MPI_EC_MONTGOMERY)
+ y = NULL;
+ else
+ y = mpi_new (0);
+
+ if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ /*
+ * Note for X25519.
+ *
+ * By the definition of X25519, this is the case where X25519
+ * returns 0, mapping infinity to zero. However, we
+ * deliberately let it return an error.
+ *
+ * For X25519 ECDH, comming here means that it might be
+ * decrypted by anyone with the shared secret of 0 (the result
+ * of this function could be always 0 by other scalar values,
+ * other than the private key of D).
+ *
+ * So, it looks like an encrypted message but it can be
+ * decrypted by anyone, or at least something wrong
+ * happens. Recipient should not proceed as if it were
+ * properly encrypted message.
+ *
+ * This handling is needed for our major usage of GnuPG,
+ * where it does the One-Pass Diffie-Hellman method,
+ * C(1, 1, ECC CDH), with an ephemeral key.
+ */
+ }
+
+ if (y)
+ r = _gcry_ecc_ec2os (x, y, ec->p);
+ else
+ {
+
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+
+ rc = _gcry_ecc_mont_encodepoint (x, nbits,
+ ec->dialect != ECC_DIALECT_SAFECURVE,
+ &rawmpi, &rawmpilen);
+ if (rc)
+ goto leave;
+
+ r = mpi_new (0);
+ mpi_set_opaque (r, rawmpi, rawmpilen*8);
+ }
+ if (!r)
+ rc = gpg_err_code_from_syserror ();
+ else
+ rc = 0;
+ mpi_free (x);
+ mpi_free (y);
+ }
+ if (DBG_CIPHER)
+ log_printmpi ("ecc_decrypt res", r);
+
+ if (!rc)
+ rc = sexp_build (r_plain, NULL, "(value %m)", r);
+
+ leave:
+ point_free (&R);
+ point_free (&kG);
+ _gcry_mpi_release (r);
+ _gcry_mpi_release (data_e);
+ sexp_release (l1);
+ _gcry_mpi_ec_free (ec);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("ecc_decrypt => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+/* Return the number of bits for the key described by PARMS. On error
+ * 0 is returned. The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ * (ecc
+ * (curve <name>)
+ * (p <mpi>)
+ * (a <mpi>)
+ * (b <mpi>)
+ * (g <mpi>)
+ * (n <mpi>)
+ * (q <mpi>))
+ *
+ * More parameters may be given. Either P or CURVE is needed.
+ */
+static unsigned int
+ecc_get_nbits (gcry_sexp_t parms)
+{
+ gcry_sexp_t l1;
+ gcry_mpi_t p;
+ unsigned int nbits = 0;
+ char *curve;
+
+ l1 = sexp_find_token (parms, "p", 1);
+ if (!l1)
+ { /* Parameter P not found - check whether we have "curve". */
+ l1 = sexp_find_token (parms, "curve", 5);
+ if (!l1)
+ return 0; /* Neither P nor CURVE found. */
+
+ curve = sexp_nth_string (l1, 1);
+ sexp_release (l1);
+ if (!curve)
+ return 0; /* No curve name given (or out of core). */
+
+ if (_gcry_ecc_fill_in_curve (0, curve, NULL, &nbits))
+ nbits = 0;
+ xfree (curve);
+ }
+ else
+ {
+ p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ if (p)
+ {
+ nbits = mpi_get_nbits (p);
+ _gcry_mpi_release (p);
+ }
+ }
+ return nbits;
+}
+
+
+/* See rsa.c for a description of this function. */
+static gpg_err_code_t
+compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparms)
+{
+#define N_COMPONENTS 6
+ static const char names[N_COMPONENTS] = "pabgnq";
+ gpg_err_code_t rc;
+ gcry_sexp_t l1;
+ gcry_mpi_t values[N_COMPONENTS];
+ int idx;
+ char *curvename = NULL;
+ int flags = 0;
+ enum gcry_mpi_ec_models model = 0;
+ enum ecc_dialects dialect = 0;
+ const unsigned char *raw;
+ unsigned int n;
+
+ /* Clear the values first. */
+ for (idx=0; idx < N_COMPONENTS; idx++)
+ values[idx] = NULL;
+
+
+ /* Look for flags. */
+ l1 = sexp_find_token (keyparms, "flags", 0);
+ if (l1)
+ {
+ rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+ if (rc)
+ goto leave;
+ }
+
+ /* Extract the parameters. */
+ if ((flags & PUBKEY_FLAG_PARAM))
+ rc = sexp_extract_param (keyparms, NULL, "p?a?b?g?n?/q",
+ &values[0], &values[1], &values[2],
+ &values[3], &values[4], &values[5],
+ NULL);
+ else
+ rc = sexp_extract_param (keyparms, NULL, "/q", &values[5], NULL);
+ if (rc)
+ goto leave;
+
+ /* Check whether a curve parameter is available and use that to fill
+ in missing values. */
+ sexp_release (l1);
+ l1 = sexp_find_token (keyparms, "curve", 5);
+ if (l1)
+ {
+ curvename = sexp_nth_string (l1, 1);
+ if (curvename)
+ {
+ rc = _gcry_ecc_update_curve_param (curvename,
+ &model, &dialect,
+ &values[0], &values[1], &values[2],
+ &values[3], &values[4]);
+ if (rc)
+ goto leave;
+ }
+ }
+
+ /* Guess required fields if a curve parameter has not been given.
+ FIXME: This is a crude hacks. We need to fix that. */
+ if (!curvename)
+ {
+ model = ((flags & PUBKEY_FLAG_EDDSA)
+ ? MPI_EC_EDWARDS
+ : MPI_EC_WEIERSTRASS);
+ dialect = ((flags & PUBKEY_FLAG_EDDSA)
+ ? ECC_DIALECT_ED25519
+ : ECC_DIALECT_STANDARD);
+ }
+
+ /* Check that all parameters are known and normalize all MPIs (that
+ should not be required but we use an internal function later and
+ thus we better make 100% sure that they are normalized). */
+ for (idx = 0; idx < N_COMPONENTS; idx++)
+ if (!values[idx])
+ {
+ rc = GPG_ERR_NO_OBJ;
+ goto leave;
+ }
+ else
+ _gcry_mpi_normalize (values[idx]);
+
+ /* Uncompress the public key with the exception of EdDSA where
+ compression is the default and we thus compute the keygrip using
+ the compressed version. Because we don't support any non-eddsa
+ compression, the only thing we need to do is to compress
+ EdDSA. */
+ if ((flags & PUBKEY_FLAG_EDDSA) && dialect == ECC_DIALECT_ED25519)
+ {
+ const unsigned int pbits = mpi_get_nbits (values[0]);
+
+ rc = _gcry_ecc_eddsa_ensure_compact (values[5], pbits);
+ if (rc)
+ goto leave;
+ }
+ else if ((flags & PUBKEY_FLAG_DJB_TWEAK))
+ {
+ /* Remove the prefix 0x40 for keygrip computation. */
+ raw = mpi_get_opaque (values[5], &n);
+ if (raw)
+ {
+ n = (n + 7)/8;
+
+ if (n > 1 && (n%2) && raw[0] == 0x40)
+ if (!_gcry_mpi_set_opaque_copy (values[5], raw + 1, (n - 1)*8))
+ rc = gpg_err_code_from_syserror ();
+ }
+ else
+ {
+ rc = GPG_ERR_INV_OBJ;
+ goto leave;
+ }
+ }
+
+ /* Hash them all. */
+ for (idx = 0; idx < N_COMPONENTS; idx++)
+ {
+ char buf[30];
+
+ if (mpi_is_opaque (values[idx]))
+ {
+ raw = mpi_get_opaque (values[idx], &n);
+ n = (n + 7)/8;
+ snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], n);
+ _gcry_md_write (md, buf, strlen (buf));
+ _gcry_md_write (md, raw, n);
+ _gcry_md_write (md, ")", 1);
+ }
+ else
+ {
+ unsigned char *rawmpi;
+ unsigned int rawmpilen;
+
+ rawmpi = _gcry_mpi_get_buffer (values[idx], 0, &rawmpilen, NULL);
+ if (!rawmpi)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+ snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], rawmpilen);
+ _gcry_md_write (md, buf, strlen (buf));
+ _gcry_md_write (md, rawmpi, rawmpilen);
+ _gcry_md_write (md, ")", 1);
+ xfree (rawmpi);
+ }
+ }
+
+ leave:
+ xfree (curvename);
+ sexp_release (l1);
+ for (idx = 0; idx < N_COMPONENTS; idx++)
+ _gcry_mpi_release (values[idx]);
+
+ return rc;
+#undef N_COMPONENTS
+}
+
+
+
+/*
+ Low-level API helper functions.
+ */
+
+/* This is the worker function for gcry_pubkey_get_sexp for ECC
+ algorithms. Note that the caller has already stored NULL at
+ R_SEXP. */
+gpg_err_code_t
+_gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode, mpi_ec_t ec)
+{
+ gpg_err_code_t rc;
+ gcry_mpi_t mpi_G = NULL;
+ gcry_mpi_t mpi_Q = NULL;
+
+ if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n)
+ return GPG_ERR_BAD_CRYPT_CTX;
+
+ if (mode == GCRY_PK_GET_SECKEY && !ec->d)
+ return GPG_ERR_NO_SECKEY;
+
+ /* Compute the public point if it is missing. */
+ if (!ec->Q && ec->d)
+ ec->Q = _gcry_ecc_compute_public (NULL, ec);
+
+ /* Encode G and Q. */
+ mpi_G = _gcry_mpi_ec_ec2os (ec->G, ec);
+ if (!mpi_G)
+ {
+ rc = GPG_ERR_BROKEN_PUBKEY;
+ goto leave;
+ }
+ if (!ec->Q)
+ {
+ rc = GPG_ERR_BAD_CRYPT_CTX;
+ goto leave;
+ }
+
+ if (ec->dialect == ECC_DIALECT_ED25519)
+ {
+ unsigned char *encpk;
+ unsigned int encpklen;
+
+ rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0,
+ &encpk, &encpklen);
+ if (rc)
+ goto leave;
+ mpi_Q = mpi_set_opaque (NULL, encpk, encpklen*8);
+ encpk = NULL;
+ }
+ else if (ec->model == MPI_EC_MONTGOMERY)
+ {
+ unsigned char *encpk;
+ unsigned int encpklen;
+
+ rc = _gcry_ecc_mont_encodepoint (ec->Q->x, ec->nbits,
+ ec->dialect != ECC_DIALECT_SAFECURVE,
+ &encpk, &encpklen);
+ if (rc)
+ goto leave;
+ mpi_Q = mpi_set_opaque (NULL, encpk, encpklen*8);
+ }
+ else
+ {
+ mpi_Q = _gcry_mpi_ec_ec2os (ec->Q, ec);
+ }
+ if (!mpi_Q)
+ {
+ rc = GPG_ERR_BROKEN_PUBKEY;
+ goto leave;
+ }
+
+ /* Fixme: We should return a curve name instead of the parameters if
+ if know that they match a curve. */
+
+ if (ec->d && (!mode || mode == GCRY_PK_GET_SECKEY))
+ {
+ /* Let's return a private key. */
+ rc = sexp_build (r_sexp, NULL,
+ "(private-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)(d%m)))",
+ ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q, ec->d);
+ }
+ else if (ec->Q)
+ {
+ /* Let's return a public key. */
+ rc = sexp_build (r_sexp, NULL,
+ "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)))",
+ ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q);
+ }
+ else
+ rc = GPG_ERR_BAD_CRYPT_CTX;
+
+ leave:
+ mpi_free (mpi_Q);
+ mpi_free (mpi_G);
+ return rc;
+}
+
+
+
+/*
+ Self-test section.
+ */
+
+static const char *
+selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey)
+{
+ /* Sample data from RFC 6979 section A.2.5, hash is of message "sample" */
+ static const char sample_data[] =
+ "(data (flags rfc6979)"
+ " (hash sha256 #af2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e98915"
+ /**/ "62113d8a62add1bf#))";
+ static const char sample_data_bad[] =
+ "(data (flags rfc6979)"
+ " (hash sha256 #bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e98915"
+ /**/ "62113d8a62add1bf#))";
+ static const char signature_r[] =
+ "efd48b2aacb6a8fd1140dd9cd45e81d69d2c877b56aaf991c34d0ea84eaf3716";
+ static const char signature_s[] =
+ "f7cb1c942d657c41d436c7a1b6e29f65f3e900dbb9aff4064dc4ab2f843acda8";
+
+ const char *errtxt = NULL;
+ gcry_error_t err;
+ gcry_sexp_t data = NULL;
+ gcry_sexp_t data_bad = NULL;
+ gcry_sexp_t sig = NULL;
+ gcry_sexp_t l1 = NULL;
+ gcry_sexp_t l2 = NULL;
+ gcry_mpi_t r = NULL;
+ gcry_mpi_t s = NULL;
+ gcry_mpi_t calculated_r = NULL;
+ gcry_mpi_t calculated_s = NULL;
+ int cmp;
+
+ err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data));
+ if (!err)
+ err = sexp_sscan (&data_bad, NULL,
+ sample_data_bad, strlen (sample_data_bad));
+ if (!err)
+ err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL);
+ if (!err)
+ err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL);
+
+ if (err)
+ {
+ errtxt = "converting data failed";
+ goto leave;
+ }
+
+ err = _gcry_pk_sign (&sig, data, skey);
+ if (err)
+ {
+ errtxt = "signing failed";
+ goto leave;
+ }
+
+ /* check against known signature */
+ errtxt = "signature validity failed";
+ l1 = _gcry_sexp_find_token (sig, "sig-val", 0);
+ if (!l1)
+ goto leave;
+ l2 = _gcry_sexp_find_token (l1, "ecdsa", 0);
+ if (!l2)
+ goto leave;
+
+ sexp_release (l1);
+ l1 = l2;
+
+ l2 = _gcry_sexp_find_token (l1, "r", 0);
+ if (!l2)
+ goto leave;
+ calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+ if (!calculated_r)
+ goto leave;
+
+ sexp_release (l2);
+ l2 = _gcry_sexp_find_token (l1, "s", 0);
+ if (!l2)
+ goto leave;
+ calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+ if (!calculated_s)
+ goto leave;
+
+ errtxt = "known sig check failed";
+
+ cmp = _gcry_mpi_cmp (r, calculated_r);
+ if (cmp)
+ goto leave;
+ cmp = _gcry_mpi_cmp (s, calculated_s);
+ if (cmp)
+ goto leave;
+
+ errtxt = NULL;
+
+ /* verify generated signature */
+ err = _gcry_pk_verify (sig, data, pkey);
+ if (err)
+ {
+ errtxt = "verify failed";
+ goto leave;
+ }
+ err = _gcry_pk_verify (sig, data_bad, pkey);
+ if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
+ {
+ errtxt = "bad signature not detected";
+ goto leave;
+ }
+
+
+ leave:
+ sexp_release (sig);
+ sexp_release (data_bad);
+ sexp_release (data);
+ sexp_release (l1);
+ sexp_release (l2);
+ mpi_release (r);
+ mpi_release (s);
+ mpi_release (calculated_r);
+ mpi_release (calculated_s);
+ return errtxt;
+}
+
+
+static gpg_err_code_t
+selftests_ecdsa (selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+ gcry_error_t err;
+ gcry_sexp_t skey = NULL;
+ gcry_sexp_t pkey = NULL;
+
+ what = "convert";
+ err = sexp_sscan (&skey, NULL, sample_secret_key_secp256,
+ strlen (sample_secret_key_secp256));
+ if (!err)
+ err = sexp_sscan (&pkey, NULL, sample_public_key_secp256,
+ strlen (sample_public_key_secp256));
+ if (err)
+ {
+ errtxt = _gcry_strerror (err);
+ goto failed;
+ }
+
+ what = "key consistency";
+ err = ecc_check_secret_key(skey);
+ if (err)
+ {
+ errtxt = _gcry_strerror (err);
+ goto failed;
+ }
+
+ what = "sign";
+ errtxt = selftest_sign (pkey, skey);
+ if (errtxt)
+ goto failed;
+
+ sexp_release(pkey);
+ sexp_release(skey);
+ return 0; /* Succeeded. */
+
+ failed:
+ sexp_release(pkey);
+ sexp_release(skey);
+ if (report)
+ report ("pubkey", GCRY_PK_ECC, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ (void)extended;
+
+ if (algo != GCRY_PK_ECC)
+ return GPG_ERR_PUBKEY_ALGO;
+
+ return selftests_ecdsa (report);
+}
+
+
+
+
+gcry_pk_spec_t _gcry_pubkey_spec_ecc =
+ {
+ GCRY_PK_ECC, { 0, 1 },
+ (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR),
+ "ECC", ecc_names,
+ "pabgnhq", "pabgnhqd", "se", "rs", "pabgnhq",
+ ecc_generate,
+ ecc_check_secret_key,
+ ecc_encrypt_raw,
+ ecc_decrypt_raw,
+ ecc_sign,
+ ecc_verify,
+ ecc_get_nbits,
+ run_selftests,
+ compute_keygrip,
+ _gcry_ecc_get_curve,
+ _gcry_ecc_get_param_sexp
+ };
diff --git a/comm/third_party/libgcrypt/cipher/elgamal.c b/comm/third_party/libgcrypt/cipher/elgamal.c
new file mode 100644
index 0000000000..4eb52d620b
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/elgamal.c
@@ -0,0 +1,1149 @@
+/* Elgamal.c - Elgamal Public Key encryption
+ * Copyright (C) 1998, 2000, 2001, 2002, 2003,
+ * 2008 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * For a description of the algorithm, see:
+ * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996.
+ * ISBN 0-471-11709-9. Pages 476 ff.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+/* Blinding is used to mitigate side-channel attacks. You may undef
+ this to speed up the operation in case the system is secured
+ against physical and network mounted side-channel attacks. */
+#define USE_BLINDING 1
+
+
+typedef struct
+{
+ gcry_mpi_t p; /* prime */
+ gcry_mpi_t g; /* group generator */
+ gcry_mpi_t y; /* g^x mod p */
+} ELG_public_key;
+
+
+typedef struct
+{
+ gcry_mpi_t p; /* prime */
+ gcry_mpi_t g; /* group generator */
+ gcry_mpi_t y; /* g^x mod p */
+ gcry_mpi_t x; /* secret exponent */
+} ELG_secret_key;
+
+
+static const char *elg_names[] =
+ {
+ "elg",
+ "openpgp-elg",
+ "openpgp-elg-sig",
+ NULL,
+ };
+
+
+static int test_keys (ELG_secret_key *sk, unsigned int nbits, int nodie);
+static gcry_mpi_t gen_k (gcry_mpi_t p, int small_k);
+static gcry_err_code_t generate (ELG_secret_key *sk, unsigned nbits,
+ gcry_mpi_t **factors);
+static int check_secret_key (ELG_secret_key *sk);
+static void do_encrypt (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input,
+ ELG_public_key *pkey);
+static void decrypt (gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b,
+ ELG_secret_key *skey);
+static void sign (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input,
+ ELG_secret_key *skey);
+static int verify (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input,
+ ELG_public_key *pkey);
+static unsigned int elg_get_nbits (gcry_sexp_t parms);
+
+
+static void (*progress_cb) (void *, const char *, int, int, int);
+static void *progress_cb_data;
+
+void
+_gcry_register_pk_elg_progress (void (*cb) (void *, const char *,
+ int, int, int),
+ void *cb_data)
+{
+ progress_cb = cb;
+ progress_cb_data = cb_data;
+}
+
+
+static void
+progress (int c)
+{
+ if (progress_cb)
+ progress_cb (progress_cb_data, "pk_elg", c, 0, 0);
+}
+
+
+/****************
+ * Michael Wiener's table on subgroup sizes to match field sizes.
+ * (floating around somewhere, probably based on the paper from
+ * Eurocrypt 96, page 332)
+ */
+static unsigned int
+wiener_map( unsigned int n )
+{
+ static struct { unsigned int p_n, q_n; } t[] =
+ { /* p q attack cost */
+ { 512, 119 }, /* 9 x 10^17 */
+ { 768, 145 }, /* 6 x 10^21 */
+ { 1024, 165 }, /* 7 x 10^24 */
+ { 1280, 183 }, /* 3 x 10^27 */
+ { 1536, 198 }, /* 7 x 10^29 */
+ { 1792, 212 }, /* 9 x 10^31 */
+ { 2048, 225 }, /* 8 x 10^33 */
+ { 2304, 237 }, /* 5 x 10^35 */
+ { 2560, 249 }, /* 3 x 10^37 */
+ { 2816, 259 }, /* 1 x 10^39 */
+ { 3072, 269 }, /* 3 x 10^40 */
+ { 3328, 279 }, /* 8 x 10^41 */
+ { 3584, 288 }, /* 2 x 10^43 */
+ { 3840, 296 }, /* 4 x 10^44 */
+ { 4096, 305 }, /* 7 x 10^45 */
+ { 4352, 313 }, /* 1 x 10^47 */
+ { 4608, 320 }, /* 2 x 10^48 */
+ { 4864, 328 }, /* 2 x 10^49 */
+ { 5120, 335 }, /* 3 x 10^50 */
+ { 0, 0 }
+ };
+ int i;
+
+ for(i=0; t[i].p_n; i++ )
+ {
+ if( n <= t[i].p_n )
+ return t[i].q_n;
+ }
+ /* Not in table - use an arbitrary high number. */
+ return n / 8 + 200;
+}
+
+static int
+test_keys ( ELG_secret_key *sk, unsigned int nbits, int nodie )
+{
+ ELG_public_key pk;
+ gcry_mpi_t test = mpi_new ( 0 );
+ gcry_mpi_t out1_a = mpi_new ( nbits );
+ gcry_mpi_t out1_b = mpi_new ( nbits );
+ gcry_mpi_t out2 = mpi_new ( nbits );
+ int failed = 0;
+
+ pk.p = sk->p;
+ pk.g = sk->g;
+ pk.y = sk->y;
+
+ _gcry_mpi_randomize ( test, nbits, GCRY_WEAK_RANDOM );
+
+ do_encrypt ( out1_a, out1_b, test, &pk );
+ decrypt ( out2, out1_a, out1_b, sk );
+ if ( mpi_cmp( test, out2 ) )
+ failed |= 1;
+
+ sign ( out1_a, out1_b, test, sk );
+ if ( !verify( out1_a, out1_b, test, &pk ) )
+ failed |= 2;
+
+ _gcry_mpi_release ( test );
+ _gcry_mpi_release ( out1_a );
+ _gcry_mpi_release ( out1_b );
+ _gcry_mpi_release ( out2 );
+
+ if (failed && !nodie)
+ log_fatal ("Elgamal test key for %s %s failed\n",
+ (failed & 1)? "encrypt+decrypt":"",
+ (failed & 2)? "sign+verify":"");
+ if (failed && DBG_CIPHER)
+ log_debug ("Elgamal test key for %s %s failed\n",
+ (failed & 1)? "encrypt+decrypt":"",
+ (failed & 2)? "sign+verify":"");
+
+ return failed;
+}
+
+
+/****************
+ * Generate a random secret exponent k from prime p, so that k is
+ * relatively prime to p-1. With SMALL_K set, k will be selected for
+ * better encryption performance - this must never be used signing!
+ */
+static gcry_mpi_t
+gen_k( gcry_mpi_t p, int small_k )
+{
+ gcry_mpi_t k = mpi_alloc_secure( 0 );
+ gcry_mpi_t temp = mpi_alloc( mpi_get_nlimbs(p) );
+ gcry_mpi_t p_1 = mpi_copy(p);
+ unsigned int orig_nbits = mpi_get_nbits(p);
+ unsigned int nbits, nbytes;
+ char *rndbuf = NULL;
+
+ if (small_k)
+ {
+ /* Using a k much lesser than p is sufficient for encryption and
+ * it greatly improves the encryption performance. We use
+ * Wiener's table and add a large safety margin. */
+ nbits = wiener_map( orig_nbits ) * 3 / 2;
+ if( nbits >= orig_nbits )
+ BUG();
+ }
+ else
+ nbits = orig_nbits;
+
+
+ nbytes = (nbits+7)/8;
+ if( DBG_CIPHER )
+ log_debug("choosing a random k\n");
+ mpi_sub_ui( p_1, p, 1);
+ for(;;)
+ {
+ if( !rndbuf || nbits < 32 )
+ {
+ xfree(rndbuf);
+ rndbuf = _gcry_random_bytes_secure( nbytes, GCRY_STRONG_RANDOM );
+ }
+ else
+ {
+ /* Change only some of the higher bits. We could improve
+ this by directly requesting more memory at the first call
+ to get_random_bytes() and use this the here maybe it is
+ easier to do this directly in random.c Anyway, it is
+ highly inlikely that we will ever reach this code. */
+ char *pp = _gcry_random_bytes_secure( 4, GCRY_STRONG_RANDOM );
+ memcpy( rndbuf, pp, 4 );
+ xfree(pp);
+ }
+ _gcry_mpi_set_buffer( k, rndbuf, nbytes, 0 );
+
+ for(;;)
+ {
+ if( !(mpi_cmp( k, p_1 ) < 0) ) /* check: k < (p-1) */
+ {
+ if( DBG_CIPHER )
+ progress('+');
+ break; /* no */
+ }
+ if( !(mpi_cmp_ui( k, 0 ) > 0) ) /* check: k > 0 */
+ {
+ if( DBG_CIPHER )
+ progress('-');
+ break; /* no */
+ }
+ if (mpi_gcd( temp, k, p_1 ))
+ goto found; /* okay, k is relative prime to (p-1) */
+ mpi_add_ui( k, k, 1 );
+ if( DBG_CIPHER )
+ progress('.');
+ }
+ }
+ found:
+ xfree (rndbuf);
+ if( DBG_CIPHER )
+ progress('\n');
+ mpi_free(p_1);
+ mpi_free(temp);
+
+ return k;
+}
+
+/****************
+ * Generate a key pair with a key of size NBITS
+ * Returns: 2 structures filled with all needed values
+ * and an array with n-1 factors of (p-1)
+ */
+static gcry_err_code_t
+generate ( ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t **ret_factors )
+{
+ gcry_err_code_t rc;
+ gcry_mpi_t p; /* the prime */
+ gcry_mpi_t p_min1;
+ gcry_mpi_t g;
+ gcry_mpi_t x; /* the secret exponent */
+ gcry_mpi_t y;
+ unsigned int qbits;
+ unsigned int xbits;
+ byte *rndbuf;
+
+ p_min1 = mpi_new ( nbits );
+ qbits = wiener_map( nbits );
+ if( qbits & 1 ) /* better have a even one */
+ qbits++;
+ g = mpi_alloc(1);
+ rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors);
+ if (rc)
+ {
+ mpi_free (p_min1);
+ mpi_free (g);
+ return rc;
+ }
+ mpi_sub_ui(p_min1, p, 1);
+
+
+ /* Select a random number which has these properties:
+ * 0 < x < p-1
+ * This must be a very good random number because this is the
+ * secret part. The prime is public and may be shared anyway,
+ * so a random generator level of 1 is used for the prime.
+ *
+ * I don't see a reason to have a x of about the same size
+ * as the p. It should be sufficient to have one about the size
+ * of q or the later used k plus a large safety margin. Decryption
+ * will be much faster with such an x.
+ */
+ xbits = qbits * 3 / 2;
+ if( xbits >= nbits )
+ BUG();
+ x = mpi_snew ( xbits );
+ if( DBG_CIPHER )
+ log_debug("choosing a random x of size %u\n", xbits );
+ rndbuf = NULL;
+ do
+ {
+ if( DBG_CIPHER )
+ progress('.');
+ if( rndbuf )
+ { /* Change only some of the higher bits */
+ if( xbits < 16 ) /* should never happen ... */
+ {
+ xfree(rndbuf);
+ rndbuf = _gcry_random_bytes_secure ((xbits+7)/8,
+ GCRY_VERY_STRONG_RANDOM);
+ }
+ else
+ {
+ char *r = _gcry_random_bytes_secure (2, GCRY_VERY_STRONG_RANDOM);
+ memcpy(rndbuf, r, 2 );
+ xfree (r);
+ }
+ }
+ else
+ {
+ rndbuf = _gcry_random_bytes_secure ((xbits+7)/8,
+ GCRY_VERY_STRONG_RANDOM );
+ }
+ _gcry_mpi_set_buffer( x, rndbuf, (xbits+7)/8, 0 );
+ mpi_clear_highbit( x, xbits+1 );
+ }
+ while( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, p_min1 )<0 ) );
+ xfree(rndbuf);
+
+ y = mpi_new (nbits);
+ mpi_powm( y, g, x, p );
+
+ if( DBG_CIPHER )
+ {
+ progress ('\n');
+ log_mpidump ("elg p", p );
+ log_mpidump ("elg g", g );
+ log_mpidump ("elg y", y );
+ log_mpidump ("elg x", x );
+ }
+
+ /* Copy the stuff to the key structures */
+ sk->p = p;
+ sk->g = g;
+ sk->y = y;
+ sk->x = x;
+
+ _gcry_mpi_release ( p_min1 );
+
+ /* Now we can test our keys (this should never fail!) */
+ test_keys ( sk, nbits - 64, 0 );
+
+ return 0;
+}
+
+
+/* Generate a key pair with a key of size NBITS not using a random
+ value for the secret key but the one given as X. This is useful to
+ implement a passphrase based decryption for a public key based
+ encryption. It has appliactions in backup systems.
+
+ Returns: A structure filled with all needed values and an array
+ with n-1 factors of (p-1). */
+static gcry_err_code_t
+generate_using_x (ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t x,
+ gcry_mpi_t **ret_factors )
+{
+ gcry_err_code_t rc;
+ gcry_mpi_t p; /* The prime. */
+ gcry_mpi_t p_min1; /* The prime minus 1. */
+ gcry_mpi_t g; /* The generator. */
+ gcry_mpi_t y; /* g^x mod p. */
+ unsigned int qbits;
+ unsigned int xbits;
+
+ sk->p = NULL;
+ sk->g = NULL;
+ sk->y = NULL;
+ sk->x = NULL;
+
+ /* Do a quick check to see whether X is suitable. */
+ xbits = mpi_get_nbits (x);
+ if ( xbits < 64 || xbits >= nbits )
+ return GPG_ERR_INV_VALUE;
+
+ p_min1 = mpi_new ( nbits );
+ qbits = wiener_map ( nbits );
+ if ( (qbits & 1) ) /* Better have an even one. */
+ qbits++;
+ g = mpi_alloc (1);
+ rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors );
+ if (rc)
+ {
+ mpi_free (p_min1);
+ mpi_free (g);
+ return rc;
+ }
+ mpi_sub_ui (p_min1, p, 1);
+
+ if (DBG_CIPHER)
+ log_debug ("using a supplied x of size %u", xbits );
+ if ( !(mpi_cmp_ui ( x, 0 ) > 0 && mpi_cmp ( x, p_min1 ) <0 ) )
+ {
+ _gcry_mpi_release ( p_min1 );
+ _gcry_mpi_release ( p );
+ _gcry_mpi_release ( g );
+ return GPG_ERR_INV_VALUE;
+ }
+
+ y = mpi_new (nbits);
+ mpi_powm ( y, g, x, p );
+
+ if ( DBG_CIPHER )
+ {
+ progress ('\n');
+ log_mpidump ("elg p", p );
+ log_mpidump ("elg g", g );
+ log_mpidump ("elg y", y );
+ log_mpidump ("elg x", x );
+ }
+
+ /* Copy the stuff to the key structures */
+ sk->p = p;
+ sk->g = g;
+ sk->y = y;
+ sk->x = mpi_copy (x);
+
+ _gcry_mpi_release ( p_min1 );
+
+ /* Now we can test our keys. */
+ if ( test_keys ( sk, nbits - 64, 1 ) )
+ {
+ _gcry_mpi_release ( sk->p ); sk->p = NULL;
+ _gcry_mpi_release ( sk->g ); sk->g = NULL;
+ _gcry_mpi_release ( sk->y ); sk->y = NULL;
+ _gcry_mpi_release ( sk->x ); sk->x = NULL;
+ return GPG_ERR_BAD_SECKEY;
+ }
+
+ return 0;
+}
+
+
+/****************
+ * Test whether the secret key is valid.
+ * Returns: if this is a valid key.
+ */
+static int
+check_secret_key( ELG_secret_key *sk )
+{
+ int rc;
+ gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) );
+
+ mpi_powm (y, sk->g, sk->x, sk->p);
+ rc = !mpi_cmp( y, sk->y );
+ mpi_free( y );
+ return rc;
+}
+
+
+static void
+do_encrypt(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_public_key *pkey )
+{
+ gcry_mpi_t k;
+
+ /* Note: maybe we should change the interface, so that it
+ * is possible to check that input is < p and return an
+ * error code.
+ */
+
+ k = gen_k( pkey->p, 1 );
+ mpi_powm (a, pkey->g, k, pkey->p);
+
+ /* b = (y^k * input) mod p
+ * = ((y^k mod p) * (input mod p)) mod p
+ * and because input is < p
+ * = ((y^k mod p) * input) mod p
+ */
+ mpi_powm (b, pkey->y, k, pkey->p);
+ mpi_mulm (b, b, input, pkey->p);
+#if 0
+ if( DBG_CIPHER )
+ {
+ log_mpidump("elg encrypted y", pkey->y);
+ log_mpidump("elg encrypted p", pkey->p);
+ log_mpidump("elg encrypted k", k);
+ log_mpidump("elg encrypted M", input);
+ log_mpidump("elg encrypted a", a);
+ log_mpidump("elg encrypted b", b);
+ }
+#endif
+ mpi_free(k);
+}
+
+
+
+
+static void
+decrypt (gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b, ELG_secret_key *skey )
+{
+ gcry_mpi_t t1, t2, r;
+ unsigned int nbits = mpi_get_nbits (skey->p);
+
+ mpi_normalize (a);
+ mpi_normalize (b);
+
+ t1 = mpi_snew (nbits);
+
+#ifdef USE_BLINDING
+
+ t2 = mpi_snew (nbits);
+ r = mpi_new (nbits);
+
+ /* We need a random number of about the prime size. The random
+ number merely needs to be unpredictable; thus we use level 0. */
+ _gcry_mpi_randomize (r, nbits, GCRY_WEAK_RANDOM);
+
+ /* t1 = r^x mod p */
+ mpi_powm (t1, r, skey->x, skey->p);
+ /* t2 = (a * r)^-x mod p */
+ mpi_mulm (t2, a, r, skey->p);
+ mpi_powm (t2, t2, skey->x, skey->p);
+ mpi_invm (t2, t2, skey->p);
+ /* t1 = (t1 * t2) mod p*/
+ mpi_mulm (t1, t1, t2, skey->p);
+
+ mpi_free (r);
+ mpi_free (t2);
+
+#else /*!USE_BLINDING*/
+
+ /* output = b/(a^x) mod p */
+ mpi_powm (t1, a, skey->x, skey->p);
+ mpi_invm (t1, t1, skey->p);
+
+#endif /*!USE_BLINDING*/
+
+ mpi_mulm (output, b, t1, skey->p);
+
+#if 0
+ if( DBG_CIPHER )
+ {
+ log_mpidump ("elg decrypted x", skey->x);
+ log_mpidump ("elg decrypted p", skey->p);
+ log_mpidump ("elg decrypted a", a);
+ log_mpidump ("elg decrypted b", b);
+ log_mpidump ("elg decrypted M", output);
+ }
+#endif
+ mpi_free (t1);
+}
+
+
+/****************
+ * Make an Elgamal signature out of INPUT
+ */
+
+static void
+sign(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_secret_key *skey )
+{
+ gcry_mpi_t k;
+ gcry_mpi_t t = mpi_alloc( mpi_get_nlimbs(a) );
+ gcry_mpi_t inv = mpi_alloc( mpi_get_nlimbs(a) );
+ gcry_mpi_t p_1 = mpi_copy(skey->p);
+
+ /*
+ * b = (t * inv) mod (p-1)
+ * b = (t * inv(k,(p-1),(p-1)) mod (p-1)
+ * b = (((M-x*a) mod (p-1)) * inv(k,(p-1),(p-1))) mod (p-1)
+ *
+ */
+ mpi_sub_ui(p_1, p_1, 1);
+ k = gen_k( skey->p, 0 /* no small K ! */ );
+ mpi_powm( a, skey->g, k, skey->p );
+ mpi_mul(t, skey->x, a );
+ mpi_subm(t, input, t, p_1 );
+ mpi_invm(inv, k, p_1 );
+ mpi_mulm(b, t, inv, p_1 );
+
+#if 0
+ if( DBG_CIPHER )
+ {
+ log_mpidump ("elg sign p", skey->p);
+ log_mpidump ("elg sign g", skey->g);
+ log_mpidump ("elg sign y", skey->y);
+ log_mpidump ("elg sign x", skey->x);
+ log_mpidump ("elg sign k", k);
+ log_mpidump ("elg sign M", input);
+ log_mpidump ("elg sign a", a);
+ log_mpidump ("elg sign b", b);
+ }
+#endif
+ mpi_free(k);
+ mpi_free(t);
+ mpi_free(inv);
+ mpi_free(p_1);
+}
+
+
+/****************
+ * Returns true if the signature composed of A and B is valid.
+ */
+static int
+verify(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_public_key *pkey )
+{
+ int rc;
+ gcry_mpi_t t1;
+ gcry_mpi_t t2;
+ gcry_mpi_t base[4];
+ gcry_mpi_t ex[4];
+
+ if( !(mpi_cmp_ui( a, 0 ) > 0 && mpi_cmp( a, pkey->p ) < 0) )
+ return 0; /* assertion 0 < a < p failed */
+
+ t1 = mpi_alloc( mpi_get_nlimbs(a) );
+ t2 = mpi_alloc( mpi_get_nlimbs(a) );
+
+#if 0
+ /* t1 = (y^a mod p) * (a^b mod p) mod p */
+ gcry_mpi_powm( t1, pkey->y, a, pkey->p );
+ gcry_mpi_powm( t2, a, b, pkey->p );
+ mpi_mulm( t1, t1, t2, pkey->p );
+
+ /* t2 = g ^ input mod p */
+ gcry_mpi_powm( t2, pkey->g, input, pkey->p );
+
+ rc = !mpi_cmp( t1, t2 );
+#elif 0
+ /* t1 = (y^a mod p) * (a^b mod p) mod p */
+ base[0] = pkey->y; ex[0] = a;
+ base[1] = a; ex[1] = b;
+ base[2] = NULL; ex[2] = NULL;
+ mpi_mulpowm( t1, base, ex, pkey->p );
+
+ /* t2 = g ^ input mod p */
+ gcry_mpi_powm( t2, pkey->g, input, pkey->p );
+
+ rc = !mpi_cmp( t1, t2 );
+#else
+ /* t1 = g ^ - input * y ^ a * a ^ b mod p */
+ mpi_invm(t2, pkey->g, pkey->p );
+ base[0] = t2 ; ex[0] = input;
+ base[1] = pkey->y; ex[1] = a;
+ base[2] = a; ex[2] = b;
+ base[3] = NULL; ex[3] = NULL;
+ mpi_mulpowm( t1, base, ex, pkey->p );
+ rc = !mpi_cmp_ui( t1, 1 );
+
+#endif
+
+ mpi_free(t1);
+ mpi_free(t2);
+ return rc;
+}
+
+/*********************************************
+ ************** interface ******************
+ *********************************************/
+
+static gpg_err_code_t
+elg_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
+{
+ gpg_err_code_t rc;
+ unsigned int nbits;
+ ELG_secret_key sk;
+ gcry_mpi_t xvalue = NULL;
+ gcry_sexp_t l1;
+ gcry_mpi_t *factors = NULL;
+ gcry_sexp_t misc_info = NULL;
+
+ memset (&sk, 0, sizeof sk);
+
+ rc = _gcry_pk_util_get_nbits (genparms, &nbits);
+ if (rc)
+ return rc;
+
+ /* Parse the optional xvalue element. */
+ l1 = sexp_find_token (genparms, "xvalue", 0);
+ if (l1)
+ {
+ xvalue = sexp_nth_mpi (l1, 1, 0);
+ sexp_release (l1);
+ if (!xvalue)
+ return GPG_ERR_BAD_MPI;
+ }
+
+ if (xvalue)
+ {
+ rc = generate_using_x (&sk, nbits, xvalue, &factors);
+ mpi_free (xvalue);
+ }
+ else
+ {
+ rc = generate (&sk, nbits, &factors);
+ }
+ if (rc)
+ goto leave;
+
+ if (factors && factors[0])
+ {
+ int nfac;
+ void **arg_list;
+ char *buffer, *p;
+
+ for (nfac = 0; factors[nfac]; nfac++)
+ ;
+ arg_list = xtrycalloc (nfac+1, sizeof *arg_list);
+ if (!arg_list)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+ buffer = xtrymalloc (30 + nfac*2 + 2 + 1);
+ if (!buffer)
+ {
+ rc = gpg_err_code_from_syserror ();
+ xfree (arg_list);
+ goto leave;
+ }
+ p = stpcpy (buffer, "(misc-key-info(pm1-factors");
+ for(nfac = 0; factors[nfac]; nfac++)
+ {
+ p = stpcpy (p, "%m");
+ arg_list[nfac] = factors + nfac;
+ }
+ p = stpcpy (p, "))");
+ rc = sexp_build_array (&misc_info, NULL, buffer, arg_list);
+ xfree (arg_list);
+ xfree (buffer);
+ if (rc)
+ goto leave;
+ }
+
+ rc = sexp_build (r_skey, NULL,
+ "(key-data"
+ " (public-key"
+ " (elg(p%m)(g%m)(y%m)))"
+ " (private-key"
+ " (elg(p%m)(g%m)(y%m)(x%m)))"
+ " %S)",
+ sk.p, sk.g, sk.y,
+ sk.p, sk.g, sk.y, sk.x,
+ misc_info);
+
+ leave:
+ mpi_free (sk.p);
+ mpi_free (sk.g);
+ mpi_free (sk.y);
+ mpi_free (sk.x);
+ sexp_release (misc_info);
+ if (factors)
+ {
+ gcry_mpi_t *mp;
+ for (mp = factors; *mp; mp++)
+ mpi_free (*mp);
+ xfree (factors);
+ }
+
+ return rc;
+}
+
+
+static gcry_err_code_t
+elg_check_secret_key (gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ ELG_secret_key sk = {NULL, NULL, NULL, NULL};
+
+ rc = sexp_extract_param (keyparms, NULL, "pgyx",
+ &sk.p, &sk.g, &sk.y, &sk.x,
+ NULL);
+ if (rc)
+ goto leave;
+
+ if (!check_secret_key (&sk))
+ rc = GPG_ERR_BAD_SECKEY;
+
+ leave:
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.g);
+ _gcry_mpi_release (sk.y);
+ _gcry_mpi_release (sk.x);
+ if (DBG_CIPHER)
+ log_debug ("elg_testkey => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+elg_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_mpi_t mpi_a = NULL;
+ gcry_mpi_t mpi_b = NULL;
+ gcry_mpi_t data = NULL;
+ ELG_public_key pk = { NULL, NULL, NULL };
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT,
+ elg_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("elg_encrypt data", data);
+ if (mpi_is_opaque (data))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the key. */
+ rc = sexp_extract_param (keyparms, NULL, "pgy",
+ &pk.p, &pk.g, &pk.y, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("elg_encrypt p", pk.p);
+ log_mpidump ("elg_encrypt g", pk.g);
+ log_mpidump ("elg_encrypt y", pk.y);
+ }
+
+ /* Do Elgamal computation and build result. */
+ mpi_a = mpi_new (0);
+ mpi_b = mpi_new (0);
+ do_encrypt (mpi_a, mpi_b, data, &pk);
+ rc = sexp_build (r_ciph, NULL, "(enc-val(elg(a%m)(b%m)))", mpi_a, mpi_b);
+
+ leave:
+ _gcry_mpi_release (mpi_a);
+ _gcry_mpi_release (mpi_b);
+ _gcry_mpi_release (pk.p);
+ _gcry_mpi_release (pk.g);
+ _gcry_mpi_release (pk.y);
+ _gcry_mpi_release (data);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("elg_encrypt => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+elg_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gpg_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_sexp_t l1 = NULL;
+ gcry_mpi_t data_a = NULL;
+ gcry_mpi_t data_b = NULL;
+ ELG_secret_key sk = {NULL, NULL, NULL, NULL};
+ gcry_mpi_t plain = NULL;
+ unsigned char *unpad = NULL;
+ size_t unpadlen = 0;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT,
+ elg_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_preparse_encval (s_data, elg_names, &l1, &ctx);
+ if (rc)
+ goto leave;
+ rc = sexp_extract_param (l1, NULL, "ab", &data_a, &data_b, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_printmpi ("elg_decrypt d_a", data_a);
+ log_printmpi ("elg_decrypt d_b", data_b);
+ }
+ if (mpi_is_opaque (data_a) || mpi_is_opaque (data_b))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the key. */
+ rc = sexp_extract_param (keyparms, NULL, "pgyx",
+ &sk.p, &sk.g, &sk.y, &sk.x,
+ NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_printmpi ("elg_decrypt p", sk.p);
+ log_printmpi ("elg_decrypt g", sk.g);
+ log_printmpi ("elg_decrypt y", sk.y);
+ if (!fips_mode ())
+ log_printmpi ("elg_decrypt x", sk.x);
+ }
+
+ plain = mpi_snew (ctx.nbits);
+ decrypt (plain, data_a, data_b, &sk);
+ if (DBG_CIPHER)
+ log_printmpi ("elg_decrypt res", plain);
+
+ /* Reverse the encoding and build the s-expression. */
+ switch (ctx.encoding)
+ {
+ case PUBKEY_ENC_PKCS1:
+ rc = _gcry_rsa_pkcs1_decode_for_enc (&unpad, &unpadlen, ctx.nbits, plain);
+ mpi_free (plain); plain = NULL;
+ if (!rc)
+ rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad);
+ break;
+
+ case PUBKEY_ENC_OAEP:
+ rc = _gcry_rsa_oaep_decode (&unpad, &unpadlen,
+ ctx.nbits, ctx.hash_algo, plain,
+ ctx.label, ctx.labellen);
+ mpi_free (plain); plain = NULL;
+ if (!rc)
+ rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad);
+ break;
+
+ default:
+ /* Raw format. For backward compatibility we need to assume a
+ signed mpi by using the sexp format string "%m". */
+ rc = sexp_build (r_plain, NULL,
+ (ctx.flags & PUBKEY_FLAG_LEGACYRESULT)
+ ? "%m" : "(value %m)",
+ plain);
+ break;
+ }
+
+
+ leave:
+ xfree (unpad);
+ _gcry_mpi_release (plain);
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.g);
+ _gcry_mpi_release (sk.y);
+ _gcry_mpi_release (sk.x);
+ _gcry_mpi_release (data_a);
+ _gcry_mpi_release (data_b);
+ sexp_release (l1);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("elg_decrypt => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+elg_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_mpi_t data = NULL;
+ ELG_secret_key sk = {NULL, NULL, NULL, NULL};
+ gcry_mpi_t sig_r = NULL;
+ gcry_mpi_t sig_s = NULL;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN,
+ elg_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("elg_sign data", data);
+ if (mpi_is_opaque (data))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the key. */
+ rc = sexp_extract_param (keyparms, NULL, "pgyx",
+ &sk.p, &sk.g, &sk.y, &sk.x, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("elg_sign p", sk.p);
+ log_mpidump ("elg_sign g", sk.g);
+ log_mpidump ("elg_sign y", sk.y);
+ if (!fips_mode ())
+ log_mpidump ("elg_sign x", sk.x);
+ }
+
+ sig_r = mpi_new (0);
+ sig_s = mpi_new (0);
+ sign (sig_r, sig_s, data, &sk);
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("elg_sign sig_r", sig_r);
+ log_mpidump ("elg_sign sig_s", sig_s);
+ }
+ rc = sexp_build (r_sig, NULL, "(sig-val(elg(r%M)(s%M)))", sig_r, sig_s);
+
+ leave:
+ _gcry_mpi_release (sig_r);
+ _gcry_mpi_release (sig_s);
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.g);
+ _gcry_mpi_release (sk.y);
+ _gcry_mpi_release (sk.x);
+ _gcry_mpi_release (data);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("elg_sign => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+elg_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_sexp_t l1 = NULL;
+ gcry_mpi_t sig_r = NULL;
+ gcry_mpi_t sig_s = NULL;
+ gcry_mpi_t data = NULL;
+ ELG_public_key pk = { NULL, NULL, NULL };
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY,
+ elg_get_nbits (s_keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("elg_verify data", data);
+ if (mpi_is_opaque (data))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the signature value. */
+ rc = _gcry_pk_util_preparse_sigval (s_sig, elg_names, &l1, NULL);
+ if (rc)
+ goto leave;
+ rc = sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("elg_verify s_r", sig_r);
+ log_mpidump ("elg_verify s_s", sig_s);
+ }
+
+ /* Extract the key. */
+ rc = sexp_extract_param (s_keyparms, NULL, "pgy",
+ &pk.p, &pk.g, &pk.y, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("elg_verify p", pk.p);
+ log_mpidump ("elg_verify g", pk.g);
+ log_mpidump ("elg_verify y", pk.y);
+ }
+
+ /* Verify the signature. */
+ if (!verify (sig_r, sig_s, data, &pk))
+ rc = GPG_ERR_BAD_SIGNATURE;
+
+ leave:
+ _gcry_mpi_release (pk.p);
+ _gcry_mpi_release (pk.g);
+ _gcry_mpi_release (pk.y);
+ _gcry_mpi_release (data);
+ _gcry_mpi_release (sig_r);
+ _gcry_mpi_release (sig_s);
+ sexp_release (l1);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("elg_verify => %s\n", rc?gpg_strerror (rc):"Good");
+ return rc;
+}
+
+
+/* Return the number of bits for the key described by PARMS. On error
+ * 0 is returned. The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ * (dsa
+ * (p <mpi>)
+ * (g <mpi>)
+ * (y <mpi>))
+ *
+ * More parameters may be given but we only need P here.
+ */
+static unsigned int
+elg_get_nbits (gcry_sexp_t parms)
+{
+ gcry_sexp_t l1;
+ gcry_mpi_t p;
+ unsigned int nbits;
+
+ l1 = sexp_find_token (parms, "p", 1);
+ if (!l1)
+ return 0; /* Parameter P not found. */
+
+ p= sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ nbits = p? mpi_get_nbits (p) : 0;
+ _gcry_mpi_release (p);
+ return nbits;
+}
+
+
+
+gcry_pk_spec_t _gcry_pubkey_spec_elg =
+ {
+ GCRY_PK_ELG, { 0, 0 },
+ (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR),
+ "ELG", elg_names,
+ "pgy", "pgyx", "ab", "rs", "pgy",
+ elg_generate,
+ elg_check_secret_key,
+ elg_encrypt,
+ elg_decrypt,
+ elg_sign,
+ elg_verify,
+ elg_get_nbits,
+ };
diff --git a/comm/third_party/libgcrypt/cipher/gost-s-box.c b/comm/third_party/libgcrypt/cipher/gost-s-box.c
new file mode 100644
index 0000000000..5d5ed7dc44
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/gost-s-box.c
@@ -0,0 +1,266 @@
+/* gost-s-box.c - GOST 28147-89 S-Box expander
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define DIM(v) (sizeof(v)/sizeof((v)[0]))
+
+struct gost_sbox
+{
+ const char *name;
+ const char *oid;
+ unsigned int keymeshing;
+ unsigned char sbox[16*8];
+} gost_sboxes[] = {
+ { "test_3411", "1.2.643.2.2.30.0", 0,
+ {
+ 0x4, 0xE, 0x5, 0x7, 0x6, 0x4, 0xD, 0x1,
+ 0xA, 0xB, 0x8, 0xD, 0xC, 0xB, 0xB, 0xF,
+ 0x9, 0x4, 0x1, 0xA, 0x7, 0xA, 0x4, 0xD,
+ 0x2, 0xC, 0xD, 0x1, 0x1, 0x0, 0x1, 0x0,
+
+ 0xD, 0x6, 0xA, 0x0, 0x5, 0x7, 0x3, 0x5,
+ 0x8, 0xD, 0x3, 0x8, 0xF, 0x2, 0xF, 0x7,
+ 0x0, 0xF, 0x4, 0x9, 0xD, 0x1, 0x5, 0xA,
+ 0xE, 0xA, 0x2, 0xF, 0x8, 0xD, 0x9, 0x4,
+
+ 0x6, 0x2, 0xE, 0xE, 0x4, 0x3, 0x0, 0x9,
+ 0xB, 0x3, 0xF, 0x4, 0xA, 0x6, 0xA, 0x2,
+ 0x1, 0x8, 0xC, 0x6, 0x9, 0x8, 0xE, 0x3,
+ 0xC, 0x1, 0x7, 0xC, 0xE, 0x5, 0x7, 0xE,
+
+ 0x7, 0x0, 0x6, 0xB, 0x0, 0x9, 0x6, 0x6,
+ 0xF, 0x7, 0x0, 0x2, 0x3, 0xC, 0x8, 0xB,
+ 0x5, 0x5, 0x9, 0x5, 0xB, 0xF, 0x2, 0x8,
+ 0x3, 0x9, 0xB, 0x3, 0x2, 0xE, 0xC, 0xC,
+ }
+ },
+ { "CryptoPro_3411", "1.2.643.2.2.30.1", 0,
+ {
+ 0xA, 0x5, 0x7, 0x4, 0x7, 0x7, 0xD, 0x1,
+ 0x4, 0xF, 0xF, 0xA, 0x6, 0x6, 0xE, 0x3,
+ 0x5, 0x4, 0xC, 0x7, 0x4, 0x2, 0x4, 0xA,
+ 0x6, 0x0, 0xE, 0xC, 0xB, 0x4, 0x1, 0x9,
+
+ 0x8, 0x2, 0x9, 0x0, 0x9, 0xD, 0x7, 0x5,
+ 0x1, 0xD, 0x4, 0xF, 0xC, 0x9, 0x0, 0xB,
+ 0x3, 0xB, 0x1, 0x2, 0x2, 0xF, 0x5, 0x4,
+ 0x7, 0x9, 0x0, 0x8, 0xA, 0x0, 0xA, 0xF,
+
+ 0xD, 0x1, 0x3, 0xE, 0x1, 0xA, 0x3, 0x8,
+ 0xC, 0x7, 0xB, 0x1, 0x8, 0x1, 0xC, 0x6,
+ 0xE, 0x6, 0x5, 0x6, 0x0, 0x5, 0x8, 0x7,
+ 0x0, 0x3, 0x2, 0x5, 0xE, 0xB, 0xF, 0xE,
+
+ 0x9, 0xC, 0x6, 0xD, 0xF, 0x8, 0x6, 0xD,
+ 0x2, 0xE, 0xA, 0xB, 0xD, 0xE, 0x2, 0x0,
+ 0xB, 0xA, 0x8, 0x9, 0x3, 0xC, 0x9, 0x2,
+ 0xF, 0x8, 0xD, 0x3, 0x5, 0x3, 0xB, 0xC,
+ }
+ },
+ { "Test_89", "1.2.643.2.2.31.0", 0,
+ {
+ 0x4, 0xC, 0xD, 0xE, 0x3, 0x8, 0x9, 0xC,
+ 0x2, 0x9, 0x8, 0x9, 0xE, 0xF, 0xB, 0x6,
+ 0xF, 0xF, 0xE, 0xB, 0x5, 0x6, 0xC, 0x5,
+ 0x5, 0xE, 0xC, 0x2, 0x9, 0xB, 0x0, 0x2,
+
+ 0x9, 0x8, 0x7, 0x5, 0x6, 0x1, 0x3, 0xB,
+ 0x1, 0x1, 0x3, 0xF, 0x8, 0x9, 0x6, 0x0,
+ 0x0, 0x3, 0x9, 0x7, 0x0, 0xC, 0x7, 0x9,
+ 0x8, 0xA, 0xA, 0x1, 0xD, 0x5, 0x5, 0xD,
+
+ 0xE, 0x2, 0x1, 0x0, 0xA, 0xD, 0x4, 0x3,
+ 0x3, 0x7, 0x5, 0xD, 0xB, 0x3, 0x8, 0xE,
+ 0xB, 0x4, 0x2, 0xC, 0x7, 0x7, 0xE, 0x7,
+ 0xC, 0xD, 0x4, 0x6, 0xC, 0xA, 0xF, 0xA,
+
+ 0xD, 0x6, 0x6, 0xA, 0x2, 0x0, 0x1, 0xF,
+ 0x7, 0x0, 0xF, 0x4, 0x1, 0xE, 0xA, 0x4,
+ 0xA, 0xB, 0x0, 0x3, 0xF, 0x2, 0x2, 0x1,
+ 0x6, 0x5, 0xB, 0x8, 0x4, 0x4, 0xD, 0x8,
+ }
+ },
+ { "CryptoPro_A", "1.2.643.2.2.31.1", 1,
+ {
+ 0x9, 0x3, 0xE, 0xE, 0xB, 0x3, 0x1, 0xB,
+ 0x6, 0x7, 0x4, 0x7, 0x5, 0xA, 0xD, 0xA,
+ 0x3, 0xE, 0x6, 0xA, 0x1, 0xD, 0x2, 0xF,
+ 0x2, 0x9, 0x2, 0xC, 0x9, 0xC, 0x9, 0x5,
+
+ 0x8, 0x8, 0xB, 0xD, 0x8, 0x1, 0x7, 0x0,
+ 0xB, 0xA, 0x3, 0x1, 0xD, 0x2, 0xA, 0xC,
+ 0x1, 0xF, 0xD, 0x3, 0xF, 0x0, 0x6, 0xE,
+ 0x7, 0x0, 0x8, 0x9, 0x0, 0xB, 0x0, 0x8,
+
+ 0xA, 0x5, 0xC, 0x0, 0xE, 0x7, 0x8, 0x6,
+ 0x4, 0x2, 0xF, 0x2, 0x4, 0x5, 0xC, 0x2,
+ 0xE, 0x6, 0x5, 0xB, 0x2, 0x9, 0x4, 0x3,
+ 0xF, 0xC, 0xA, 0x4, 0x3, 0x4, 0x5, 0x9,
+
+ 0xC, 0xB, 0x0, 0xF, 0xC, 0x8, 0xF, 0x1,
+ 0x0, 0x4, 0x7, 0x8, 0x7, 0xF, 0x3, 0x7,
+ 0xD, 0xD, 0x1, 0x5, 0xA, 0xE, 0xB, 0xD,
+ 0x5, 0x1, 0x9, 0x6, 0x6, 0x6, 0xE, 0x4,
+ }
+ },
+ { "CryptoPro_B", "1.2.643.2.2.31.2", 1,
+ {
+ 0x8, 0x0, 0xE, 0x7, 0x2, 0x8, 0x5, 0x0,
+ 0x4, 0x1, 0xC, 0x5, 0x7, 0x3, 0x2, 0x4,
+ 0xB, 0x2, 0x0, 0x0, 0xC, 0x2, 0xA, 0xB,
+ 0x1, 0xA, 0xA, 0xD, 0xF, 0x6, 0xB, 0xE,
+
+ 0x3, 0x4, 0x9, 0xB, 0x9, 0x4, 0x9, 0x8,
+ 0x5, 0xD, 0x2, 0x6, 0x5, 0xD, 0x1, 0x3,
+ 0x0, 0x5, 0xD, 0x1, 0xA, 0xE, 0xC, 0x7,
+ 0x9, 0xC, 0xB, 0x2, 0xB, 0xB, 0x3, 0x1,
+
+ 0x2, 0x9, 0x7, 0x3, 0x1, 0xC, 0x7, 0xA,
+ 0xE, 0x7, 0x5, 0xA, 0x4, 0x1, 0x4, 0x2,
+ 0xA, 0x3, 0x8, 0xC, 0x0, 0x7, 0xD, 0x9,
+ 0xC, 0xF, 0xF, 0xF, 0xD, 0xF, 0x0, 0x6,
+
+ 0xD, 0xB, 0x3, 0x4, 0x6, 0xA, 0x6, 0xF,
+ 0x6, 0x8, 0x6, 0xE, 0x8, 0x0, 0xF, 0xD,
+ 0x7, 0x6, 0x1, 0x9, 0xE, 0x9, 0x8, 0x5,
+ 0xF, 0xE, 0x4, 0x8, 0x3, 0x5, 0xE, 0xC,
+ }
+ },
+ { "CryptoPro_C", "1.2.643.2.2.31.3", 1,
+ {
+ 0x1, 0x0, 0x8, 0x3, 0x8, 0xC, 0xA, 0x7,
+ 0xB, 0x1, 0x2, 0x6, 0xD, 0x9, 0x9, 0x4,
+ 0xC, 0x7, 0x5, 0x0, 0xB, 0xB, 0x6, 0x0,
+ 0x2, 0xD, 0x0, 0x1, 0x0, 0x1, 0x8, 0x5,
+
+ 0x9, 0xB, 0x4, 0x5, 0x4, 0x8, 0xD, 0xA,
+ 0xD, 0x4, 0x9, 0xD, 0x5, 0xE, 0xE, 0x2,
+ 0x0, 0x5, 0xF, 0xA, 0x1, 0x2, 0x2, 0xF,
+ 0xF, 0x2, 0xA, 0x8, 0x2, 0x4, 0x0, 0xE,
+
+ 0x4, 0x8, 0x3, 0xB, 0x9, 0x7, 0xF, 0xC,
+ 0x5, 0xE, 0x7, 0x2, 0x3, 0x3, 0x3, 0x6,
+ 0x8, 0xF, 0xC, 0x9, 0xC, 0x6, 0x5, 0x1,
+ 0xE, 0xC, 0xD, 0x7, 0xE, 0x5, 0xB, 0xB,
+
+ 0xA, 0x9, 0x6, 0xE, 0x6, 0xA, 0x4, 0xD,
+ 0x7, 0xA, 0xE, 0xF, 0xF, 0x0, 0x1, 0x9,
+ 0x6, 0x6, 0x1, 0xC, 0xA, 0xF, 0xC, 0x3,
+ 0x3, 0x3, 0xB, 0x4, 0x7, 0xD, 0x7, 0x8,
+ }
+ },
+ { "CryptoPro_D", "1.2.643.2.2.31.4", 1,
+ {
+ 0xF, 0xB, 0x1, 0x1, 0x0, 0x8, 0x3, 0x1,
+ 0xC, 0x6, 0xC, 0x5, 0xC, 0x0, 0x0, 0xA,
+ 0x2, 0x3, 0xB, 0xE, 0x8, 0xF, 0x6, 0x6,
+ 0xA, 0x4, 0x0, 0xC, 0x9, 0x3, 0xF, 0x8,
+
+ 0x6, 0xC, 0xF, 0xA, 0xD, 0x2, 0x1, 0xF,
+ 0x4, 0xF, 0xE, 0x7, 0x2, 0x5, 0xE, 0xB,
+ 0x5, 0xE, 0x6, 0x0, 0xA, 0xE, 0x9, 0x0,
+ 0x0, 0x2, 0x5, 0xD, 0xB, 0xB, 0x2, 0x4,
+
+ 0x7, 0x7, 0xA, 0x6, 0x7, 0x1, 0xD, 0xC,
+ 0x9, 0xD, 0xD, 0x2, 0x3, 0xA, 0x8, 0x3,
+ 0xE, 0x8, 0x4, 0xB, 0x6, 0x4, 0xC, 0x5,
+ 0xD, 0x0, 0x8, 0x4, 0x5, 0x7, 0x4, 0x9,
+
+ 0x1, 0x5, 0x9, 0x9, 0x4, 0xC, 0xB, 0x7,
+ 0xB, 0xA, 0x3, 0x3, 0xE, 0x9, 0xA, 0xD,
+ 0x8, 0x9, 0x7, 0xF, 0xF, 0xD, 0x5, 0x2,
+ 0x3, 0x1, 0x2, 0x8, 0x1, 0x6, 0x7, 0xE,
+ }
+ },
+ { "TC26_Z", "1.2.643.7.1.2.5.1.1", 1,
+ {
+ 0xc, 0x6, 0xb, 0xc, 0x7, 0x5, 0x8, 0x1,
+ 0x4, 0x8, 0x3, 0x8, 0xf, 0xd, 0xe, 0x7,
+ 0x6, 0x2, 0x5, 0x2, 0x5, 0xf, 0x2, 0xe,
+ 0x2, 0x3, 0x8, 0x1, 0xa, 0x6, 0x5, 0xd,
+
+ 0xa, 0x9, 0x2, 0xd, 0x8, 0x9, 0x6, 0x0,
+ 0x5, 0xa, 0xf, 0x4, 0x1, 0x2, 0x9, 0x5,
+ 0xb, 0x5, 0xa, 0xf, 0x6, 0xc, 0x1, 0x8,
+ 0x9, 0xc, 0xd, 0x6, 0xd, 0xa, 0xc, 0x3,
+
+ 0xe, 0x1, 0xe, 0x7, 0x0, 0xb, 0xf, 0x4,
+ 0x8, 0xe, 0x1, 0x0, 0x9, 0x7, 0x4, 0xf,
+ 0xd, 0x4, 0x7, 0xa, 0x3, 0x8, 0xb, 0xa,
+ 0x7, 0x7, 0x4, 0x5, 0xe, 0x1, 0x0, 0x6,
+
+ 0x0, 0xb, 0xc, 0x3, 0xb, 0x4, 0xd, 0x9,
+ 0x3, 0xd, 0x9, 0xe, 0x4, 0x3, 0xa, 0xc,
+ 0xf, 0x0, 0x6, 0x9, 0x2, 0xe, 0x3, 0xb,
+ 0x1, 0xf, 0x0, 0xb, 0xc, 0x0, 0x7, 0x2,
+ }
+ },
+};
+
+int main(int argc, char **argv)
+{
+ unsigned int i, j, s;
+ FILE *f;
+
+ if (argc == 1)
+ f = stdin;
+ else
+ f = fopen(argv[1], "w");
+
+ if (!f)
+ {
+ perror("fopen");
+ exit(1);
+ }
+
+ for (s = 0; s < DIM(gost_sboxes); s++)
+ {
+ unsigned char *sbox = gost_sboxes[s].sbox;
+ fprintf (f, "static const u32 sbox_%s[4*256] =\n {", gost_sboxes[s].name);
+ for (i = 0; i < 4; i++) {
+ fprintf (f, "\n /* %d */\n ", i);
+ for (j = 0; j < 256; j++) {
+ unsigned int val;
+ if (j % 4 == 0 && j != 0)
+ fprintf (f, "\n ");
+ val = sbox[ (j & 0xf) * 8 + 2 * i + 0] |
+ (sbox[ (j >> 4) * 8 + 2 * i + 1] << 4);
+ val <<= (8*i);
+ val = (val << 11) | (val >> 21);
+ fprintf (f, " 0x%08x,", val);
+ }
+ }
+ fprintf (f, "\n };\n\n");
+ }
+
+ fprintf (f, "static struct\n{\n const char *oid;\n const u32 *sbox;\n const int keymeshing;\n} gost_oid_map[] = {\n");
+
+ for (s = 0; s < DIM(gost_sboxes); s++)
+ {
+ fprintf (f, " { \"%s\", sbox_%s, %d },\n", gost_sboxes[s].oid, gost_sboxes[s].name, gost_sboxes[s].keymeshing );
+ }
+
+ fprintf(f, " { NULL, NULL, 0 }\n};\n");
+
+ fclose (f);
+
+ return 0;
+}
diff --git a/comm/third_party/libgcrypt/cipher/gost.h b/comm/third_party/libgcrypt/cipher/gost.h
new file mode 100644
index 0000000000..53a4050503
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/gost.h
@@ -0,0 +1,34 @@
+/* gost.h - GOST 28147-89 implementation
+ * Copyright (C) 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _GCRY_GOST_H
+#define _GCRY_GOST_H
+
+typedef struct {
+ u32 key[8];
+ const u32 *sbox;
+ unsigned int mesh_counter;
+ unsigned int mesh_limit;
+} GOST28147_context;
+
+/* This is a simple interface that will be used by GOST R 34.11-94 */
+unsigned int _gcry_gost_enc_data (const u32 *key,
+ u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro);
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/gost28147.c b/comm/third_party/libgcrypt/cipher/gost28147.c
new file mode 100644
index 0000000000..9445b378c4
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/gost28147.c
@@ -0,0 +1,553 @@
+/* gost28147.c - GOST 28147-89 implementation for Libgcrypt
+ * Copyright (C) 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* GOST 28147-89 defines several modes of encryption:
+ * - ECB which should be used only for key transfer
+ * - CFB mode
+ * - OFB-like mode with additional transformation on keystream
+ * RFC 5830 names this 'counter encryption' mode
+ * Original GOST text uses the term 'gammirovanie'
+ * - MAC mode ('imitovstavka')
+ *
+ * This implementation handles ECB and CFB modes via usual libgcrypt handling.
+ * OFB-like modes are unsupported.
+ */
+
+#include <config.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "mac-internal.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+
+#include "gost.h"
+#include "gost-sb.h"
+
+static void
+gost_do_set_sbox (GOST28147_context *ctx, unsigned int index)
+{
+ ctx->sbox = gost_oid_map[index].sbox;
+ ctx->mesh_limit = gost_oid_map[index].keymeshing ? 1024 : 0;
+}
+
+static gcry_err_code_t
+gost_setkey (void *c, const byte *key, unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ int i;
+ GOST28147_context *ctx = c;
+
+ (void)bulk_ops;
+
+ if (keylen != 256 / 8)
+ return GPG_ERR_INV_KEYLEN;
+
+ if (!ctx->sbox)
+ gost_do_set_sbox (ctx, 0);
+
+ for (i = 0; i < 8; i++)
+ {
+ ctx->key[i] = buf_get_le32(&key[4*i]);
+ }
+
+ ctx->mesh_counter = 0;
+
+ return GPG_ERR_NO_ERROR;
+}
+
+static inline u32
+gost_val (u32 subkey, u32 cm1, const u32 *sbox)
+{
+ cm1 += subkey;
+ cm1 = sbox[0*256 + ((cm1 >> 0) & 0xff)] |
+ sbox[1*256 + ((cm1 >> 8) & 0xff)] |
+ sbox[2*256 + ((cm1 >> 16) & 0xff)] |
+ sbox[3*256 + ((cm1 >> 24) & 0xff)];
+ return cm1;
+}
+
+static unsigned int
+_gost_encrypt_data (const u32 *sbox, const u32 *key, u32 *o1, u32 *o2, u32 n1, u32 n2)
+{
+ n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+ n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+ n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+ n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+ n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+ n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+ n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+ n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+ n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+ n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+ n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+ n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+ n2 ^= gost_val (key[7], n1, sbox); n1 ^= gost_val (key[6], n2, sbox);
+ n2 ^= gost_val (key[5], n1, sbox); n1 ^= gost_val (key[4], n2, sbox);
+ n2 ^= gost_val (key[3], n1, sbox); n1 ^= gost_val (key[2], n2, sbox);
+ n2 ^= gost_val (key[1], n1, sbox); n1 ^= gost_val (key[0], n2, sbox);
+
+ *o1 = n2;
+ *o2 = n1;
+
+ return /* burn_stack */ 4*sizeof(void*) /* func call */ +
+ 3*sizeof(void*) /* stack */ +
+ 4*sizeof(void*) /* gost_val call */;
+}
+
+static unsigned int
+gost_encrypt_block (void *c, byte *outbuf, const byte *inbuf)
+{
+ GOST28147_context *ctx = c;
+ u32 n1, n2;
+ unsigned int burn;
+
+ n1 = buf_get_le32 (inbuf);
+ n2 = buf_get_le32 (inbuf+4);
+
+ burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2);
+
+ buf_put_le32 (outbuf+0, n1);
+ buf_put_le32 (outbuf+4, n2);
+
+ return /* burn_stack */ burn + 6*sizeof(void*) /* func call */;
+}
+
+unsigned int _gcry_gost_enc_data (const u32 *key,
+ u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro)
+{
+ const u32 *sbox;
+ if (cryptopro)
+ sbox = sbox_CryptoPro_3411;
+ else
+ sbox = sbox_test_3411;
+ return _gost_encrypt_data (sbox, key, o1, o2, n1, n2) + 7 * sizeof(void *);
+}
+
+static unsigned int
+gost_decrypt_block (void *c, byte *outbuf, const byte *inbuf)
+{
+ GOST28147_context *ctx = c;
+ u32 n1, n2;
+ const u32 *sbox = ctx->sbox;
+
+ n1 = buf_get_le32 (inbuf);
+ n2 = buf_get_le32 (inbuf+4);
+
+ n2 ^= gost_val (ctx->key[0], n1, sbox); n1 ^= gost_val (ctx->key[1], n2, sbox);
+ n2 ^= gost_val (ctx->key[2], n1, sbox); n1 ^= gost_val (ctx->key[3], n2, sbox);
+ n2 ^= gost_val (ctx->key[4], n1, sbox); n1 ^= gost_val (ctx->key[5], n2, sbox);
+ n2 ^= gost_val (ctx->key[6], n1, sbox); n1 ^= gost_val (ctx->key[7], n2, sbox);
+
+ n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, sbox);
+ n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, sbox);
+ n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, sbox);
+ n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, sbox);
+
+ n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, sbox);
+ n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, sbox);
+ n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, sbox);
+ n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, sbox);
+
+ n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, sbox);
+ n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, sbox);
+ n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, sbox);
+ n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, sbox);
+
+ buf_put_le32 (outbuf+0, n2);
+ buf_put_le32 (outbuf+4, n1);
+
+ return /* burn_stack */ 4*sizeof(void*) /* func call */ +
+ 3*sizeof(void*) /* stack */ +
+ 4*sizeof(void*) /* gost_val call */;
+}
+
+static gpg_err_code_t
+gost_set_sbox (GOST28147_context *ctx, const char *oid)
+{
+ int i;
+
+ for (i = 0; gost_oid_map[i].oid; i++)
+ {
+ if (!strcmp(gost_oid_map[i].oid, oid))
+ {
+ gost_do_set_sbox (ctx, i);
+ return 0;
+ }
+ }
+ return GPG_ERR_VALUE_NOT_FOUND;
+}
+
+static gpg_err_code_t
+gost_set_extra_info (void *c, int what, const void *buffer, size_t buflen)
+{
+ GOST28147_context *ctx = c;
+ gpg_err_code_t ec = 0;
+
+ (void)buffer;
+ (void)buflen;
+
+ switch (what)
+ {
+ case GCRYCTL_SET_SBOX:
+ ec = gost_set_sbox (ctx, buffer);
+ break;
+
+ default:
+ ec = GPG_ERR_INV_OP;
+ break;
+ }
+ return ec;
+}
+
+static const byte CryptoProKeyMeshingKey[] = {
+ 0x69, 0x00, 0x72, 0x22, 0x64, 0xC9, 0x04, 0x23,
+ 0x8D, 0x3A, 0xDB, 0x96, 0x46, 0xE9, 0x2A, 0xC4,
+ 0x18, 0xFE, 0xAC, 0x94, 0x00, 0xED, 0x07, 0x12,
+ 0xC0, 0x86, 0xDC, 0xC2, 0xEF, 0x4C, 0xA9, 0x2B
+};
+
+/* Implements key meshing algorithm by modifing ctx and returning new IV.
+ Thanks to Dmitry Belyavskiy. */
+static void
+cryptopro_key_meshing (GOST28147_context *ctx)
+{
+ unsigned char newkey[32];
+ unsigned int i;
+
+ /* "Decrypt" the static keymeshing key */
+ for (i = 0; i < 4; i++)
+ {
+ gost_decrypt_block (ctx, newkey + i*8, CryptoProKeyMeshingKey + i*8);
+ }
+
+ /* Set new key */
+ for (i = 0; i < 8; i++)
+ {
+ ctx->key[i] = buf_get_le32(&newkey[4*i]);
+ }
+
+ ctx->mesh_counter = 0;
+}
+
+static unsigned int
+gost_encrypt_block_mesh (void *c, byte *outbuf, const byte *inbuf)
+{
+ GOST28147_context *ctx = c;
+ u32 n1, n2;
+ unsigned int burn;
+
+ n1 = buf_get_le32 (inbuf);
+ n2 = buf_get_le32 (inbuf+4);
+
+ if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit))
+ {
+ cryptopro_key_meshing (ctx);
+ /* Yes, encrypt twice: once for KeyMeshing procedure per RFC 4357,
+ * once for block encryption */
+ _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2);
+ }
+
+ burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2);
+
+ ctx->mesh_counter += 8;
+
+ buf_put_le32 (outbuf+0, n1);
+ buf_put_le32 (outbuf+4, n2);
+
+ return /* burn_stack */ burn + 6*sizeof(void*) /* func call */;
+}
+
+static gcry_cipher_oid_spec_t oids_gost28147_mesh[] =
+ {
+ { "1.2.643.2.2.21", GCRY_CIPHER_MODE_CFB },
+ /* { "1.2.643.2.2.31.0", GCRY_CIPHER_MODE_CNTGOST }, */
+ { "1.2.643.2.2.31.1", GCRY_CIPHER_MODE_CFB },
+ { "1.2.643.2.2.31.2", GCRY_CIPHER_MODE_CFB },
+ { "1.2.643.2.2.31.3", GCRY_CIPHER_MODE_CFB },
+ { "1.2.643.2.2.31.4", GCRY_CIPHER_MODE_CFB },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_gost28147 =
+ {
+ GCRY_CIPHER_GOST28147, {0, 0},
+ "GOST28147", NULL, NULL, 8, 256,
+ sizeof (GOST28147_context),
+ gost_setkey,
+ gost_encrypt_block,
+ gost_decrypt_block,
+ NULL, NULL, NULL, gost_set_extra_info,
+ };
+
+/* Meshing is used only for CFB, so no need to have separate
+ * gost_decrypt_block_mesh.
+ * Moreover key meshing is specified as encrypting the block (IV). Decrypting
+ * it afterwards would be meaningless. */
+gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh =
+ {
+ GCRY_CIPHER_GOST28147_MESH, {0, 0},
+ "GOST28147_MESH", NULL, oids_gost28147_mesh, 8, 256,
+ sizeof (GOST28147_context),
+ gost_setkey,
+ gost_encrypt_block_mesh,
+ gost_decrypt_block,
+ NULL, NULL, NULL, gost_set_extra_info,
+ };
+
+static gcry_err_code_t
+gost_imit_open (gcry_mac_hd_t h)
+{
+ memset(&h->u.imit, 0, sizeof(h->u.imit));
+ return 0;
+}
+
+static void
+gost_imit_close (gcry_mac_hd_t h)
+{
+ (void) h;
+}
+
+static gcry_err_code_t
+gost_imit_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+ int i;
+
+ if (keylen != 256 / 8)
+ return GPG_ERR_INV_KEYLEN;
+
+ if (!h->u.imit.ctx.sbox)
+ h->u.imit.ctx.sbox = sbox_CryptoPro_A;
+
+ for (i = 0; i < 8; i++)
+ {
+ h->u.imit.ctx.key[i] = buf_get_le32(&key[4*i]);
+ }
+
+ return 0;
+}
+
+static gcry_err_code_t
+gost_imit_setiv (gcry_mac_hd_t h,
+ const unsigned char *iv,
+ size_t ivlen)
+{
+ if (ivlen != 8)
+ return GPG_ERR_INV_LENGTH;
+
+ h->u.imit.n1 = buf_get_le32 (iv + 0);
+ h->u.imit.n2 = buf_get_le32 (iv + 4);
+
+ return 0;
+}
+
+static gcry_err_code_t
+gost_imit_reset (gcry_mac_hd_t h)
+{
+ h->u.imit.n1 = h->u.imit.n2 = 0;
+ h->u.imit.unused = 0;
+ return 0;
+}
+
+static unsigned int
+_gost_imit_block (const u32 *sbox, const u32 *key, u32 *o1, u32 *o2, u32 n1, u32 n2)
+{
+ n1 ^= *o1;
+ n2 ^= *o2;
+
+ n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+ n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+ n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+ n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+ n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+ n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+ n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+ n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+ *o1 = n1;
+ *o2 = n2;
+
+ return /* burn_stack */ 4*sizeof(void*) /* func call */ +
+ 3*sizeof(void*) /* stack */ +
+ 4*sizeof(void*) /* gost_val call */;
+}
+
+static inline unsigned int
+gost_imit_block (GOST28147_context *ctx, u32 *n1, u32 *n2, const unsigned char *buf)
+{
+ if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit))
+ cryptopro_key_meshing (ctx);
+
+ return _gost_imit_block (ctx->sbox, ctx->key,
+ n1, n2,
+ buf_get_le32 (buf+0),
+ buf_get_le32 (buf+4));
+}
+
+static gcry_err_code_t
+gost_imit_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ const int blocksize = 8;
+ unsigned int burn = 0;
+ if (!buflen || !buf)
+ return GPG_ERR_NO_ERROR;
+
+ if (h->u.imit.unused)
+ {
+ for (; buflen && h->u.imit.unused < blocksize; buflen --)
+ h->u.imit.lastiv[h->u.imit.unused++] = *buf++;
+
+ if (h->u.imit.unused < blocksize)
+ return GPG_ERR_NO_ERROR;
+
+ h->u.imit.count ++;
+ burn = gost_imit_block (&h->u.imit.ctx,
+ &h->u.imit.n1, &h->u.imit.n2,
+ h->u.imit.lastiv);
+
+ h->u.imit.unused = 0;
+ }
+
+ while (buflen >= blocksize)
+ {
+ h->u.imit.count ++;
+ burn = gost_imit_block (&h->u.imit.ctx,
+ &h->u.imit.n1, &h->u.imit.n2,
+ buf);
+ buf += blocksize;
+ buflen -= blocksize;
+ }
+
+ for (; buflen; buflen--)
+ h->u.imit.lastiv[h->u.imit.unused++] = *buf++;
+
+ _gcry_burn_stack (burn);
+
+ return GPG_ERR_NO_ERROR;
+}
+
+static void
+gost_imit_finish (gcry_mac_hd_t h)
+{
+ static const unsigned char zero[8] = {0};
+
+ /* Fill till full block */
+ if (h->u.imit.unused)
+ gost_imit_write(h, zero, 8 - h->u.imit.unused);
+
+ if (h->u.imit.count == 1)
+ gost_imit_write(h, zero, 8);
+}
+
+static gcry_err_code_t
+gost_imit_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+ unsigned int dlen = 8;
+ unsigned char digest[8];
+
+ gost_imit_finish (h);
+
+ buf_put_le32 (digest+0, h->u.imit.n1);
+ buf_put_le32 (digest+4, h->u.imit.n2);
+
+ if (*outlen <= dlen)
+ buf_cpy (outbuf, digest, *outlen);
+ else
+ {
+ buf_cpy (outbuf, digest, dlen);
+ *outlen = dlen;
+ }
+ return 0;
+}
+
+static gcry_err_code_t
+gost_imit_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ unsigned char tbuf[8];
+
+ gost_imit_finish (h);
+
+ buf_put_le32 (tbuf+0, h->u.imit.n1);
+ buf_put_le32 (tbuf+4, h->u.imit.n2);
+
+ return buf_eq_const(tbuf, buf, buflen) ?
+ GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM;
+}
+
+static unsigned int
+gost_imit_get_maclen (int algo)
+{
+ (void) algo;
+ return 4; /* or 8 */
+}
+
+
+static unsigned int
+gost_imit_get_keylen (int algo)
+{
+ (void) algo;
+ return 256 / 8;
+}
+
+static gpg_err_code_t
+gost_imit_set_extra_info (gcry_mac_hd_t hd, int what, const void *buffer, size_t buflen)
+{
+ gpg_err_code_t ec = 0;
+
+ (void)buffer;
+ (void)buflen;
+
+ switch (what)
+ {
+ case GCRYCTL_SET_SBOX:
+ ec = gost_set_sbox (&hd->u.imit.ctx, buffer);
+ break;
+
+ default:
+ ec = GPG_ERR_INV_OP;
+ break;
+ }
+ return ec;
+}
+
+
+static gcry_mac_spec_ops_t gost_imit_ops = {
+ gost_imit_open,
+ gost_imit_close,
+ gost_imit_setkey,
+ gost_imit_setiv,
+ gost_imit_reset,
+ gost_imit_write,
+ gost_imit_read,
+ gost_imit_verify,
+ gost_imit_get_maclen,
+ gost_imit_get_keylen,
+ gost_imit_set_extra_info,
+ NULL
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_gost28147_imit =
+ {
+ GCRY_MAC_GOST28147_IMIT, {0, 0}, "GOST28147_IMIT",
+ &gost_imit_ops
+ };
diff --git a/comm/third_party/libgcrypt/cipher/gostr3411-94.c b/comm/third_party/libgcrypt/cipher/gostr3411-94.c
new file mode 100644
index 0000000000..7cf0637e26
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/gostr3411-94.c
@@ -0,0 +1,383 @@
+/* gostr3411-94.c - GOST R 34.11-94 hash function
+ * Copyright (C) 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+#include "gost.h"
+
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
+typedef struct {
+ gcry_md_block_ctx_t bctx;
+ union {
+ u32 h[8];
+ byte result[32];
+ };
+ u32 sigma[8];
+ u32 len;
+ int cryptopro;
+} GOSTR3411_CONTEXT;
+
+static unsigned int
+transform (void *c, const unsigned char *data, size_t nblks);
+
+static void
+gost3411_init (void *context, unsigned int flags)
+{
+ GOSTR3411_CONTEXT *hd = context;
+
+ (void)flags;
+
+ memset (hd->h, 0, 32);
+ memset (hd->sigma, 0, 32);
+
+ hd->bctx.nblocks = 0;
+ hd->bctx.count = 0;
+ hd->bctx.blocksize_shift = _gcry_ctz(32);
+ hd->bctx.bwrite = transform;
+ hd->cryptopro = 0;
+}
+
+static void
+gost3411_cp_init (void *context, unsigned int flags)
+{
+ GOSTR3411_CONTEXT *hd = context;
+ gost3411_init (context, flags);
+ hd->cryptopro = 1;
+}
+
+static void
+do_p (u32 *p, u32 *u, u32 *v)
+{
+ int k;
+ u32 t[8];
+
+ for (k = 0; k < 8; k++)
+ t[k] = u[k] ^ v[k];
+
+ k = 0;
+ p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+ ((t[2] >> (8*k)) & 0xff) << 8 |
+ ((t[4] >> (8*k)) & 0xff) << 16 |
+ ((t[6] >> (8*k)) & 0xff) << 24;
+ p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+ ((t[3] >> (8*k)) & 0xff) << 8 |
+ ((t[5] >> (8*k)) & 0xff) << 16 |
+ ((t[7] >> (8*k)) & 0xff) << 24;
+
+ k = 1;
+ p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+ ((t[2] >> (8*k)) & 0xff) << 8 |
+ ((t[4] >> (8*k)) & 0xff) << 16 |
+ ((t[6] >> (8*k)) & 0xff) << 24;
+ p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+ ((t[3] >> (8*k)) & 0xff) << 8 |
+ ((t[5] >> (8*k)) & 0xff) << 16 |
+ ((t[7] >> (8*k)) & 0xff) << 24;
+
+ k = 2;
+ p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+ ((t[2] >> (8*k)) & 0xff) << 8 |
+ ((t[4] >> (8*k)) & 0xff) << 16 |
+ ((t[6] >> (8*k)) & 0xff) << 24;
+ p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+ ((t[3] >> (8*k)) & 0xff) << 8 |
+ ((t[5] >> (8*k)) & 0xff) << 16 |
+ ((t[7] >> (8*k)) & 0xff) << 24;
+
+ k = 3;
+ p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+ ((t[2] >> (8*k)) & 0xff) << 8 |
+ ((t[4] >> (8*k)) & 0xff) << 16 |
+ ((t[6] >> (8*k)) & 0xff) << 24;
+ p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+ ((t[3] >> (8*k)) & 0xff) << 8 |
+ ((t[5] >> (8*k)) & 0xff) << 16 |
+ ((t[7] >> (8*k)) & 0xff) << 24;
+}
+
+static void
+do_a (u32 *u)
+{
+ u32 t[2];
+ int i;
+ memcpy(t, u, 2*4);
+ for (i = 0; i < 6; i++)
+ u[i] = u[i+2];
+ u[6] = u[0] ^ t[0];
+ u[7] = u[1] ^ t[1];
+}
+/* apply do_a twice: 1 2 3 4 -> 3 4 1^2 2^3 */
+static void
+do_a2 (u32 *u)
+{
+ u32 t[4];
+ int i;
+ memcpy (t, u, 16);
+ memcpy (u, u + 4, 16);
+ for (i = 0; i < 2; i++)
+ {
+ u[4+i] = t[i] ^ t[i + 2];
+ u[6+i] = u[i] ^ t[i + 2];
+ }
+}
+
+static void
+do_apply_c2 (u32 *u)
+{
+ u[ 0] ^= 0xff00ff00;
+ u[ 1] ^= 0xff00ff00;
+ u[ 2] ^= 0x00ff00ff;
+ u[ 3] ^= 0x00ff00ff;
+ u[ 4] ^= 0x00ffff00;
+ u[ 5] ^= 0xff0000ff;
+ u[ 6] ^= 0x000000ff;
+ u[ 7] ^= 0xff00ffff;
+}
+
+#define do_chi_step12(e) \
+ e[6] ^= ((e[6] >> 16) ^ e[7] ^ (e[7] >> 16) ^ e[4] ^ (e[5] >>16)) & 0xffff;
+
+#define do_chi_step13(e) \
+ e[6] ^= ((e[7] ^ (e[7] >> 16) ^ e[0] ^ (e[4] >> 16) ^ e[6]) & 0xffff) << 16;
+
+#define do_chi_doublestep(e, i) \
+ e[i] ^= (e[i] >> 16) ^ (e[(i+1)%8] << 16) ^ e[(i+1)%8] ^ (e[(i+1)%8] >> 16) ^ (e[(i+2)%8] << 16) ^ e[(i+6)%8] ^ (e[(i+7)%8] >> 16); \
+ e[i] ^= (e[i] << 16);
+
+static void
+do_chi_submix12 (u32 *e, u32 *x)
+{
+ e[6] ^= x[0];
+ e[7] ^= x[1];
+ e[0] ^= x[2];
+ e[1] ^= x[3];
+ e[2] ^= x[4];
+ e[3] ^= x[5];
+ e[4] ^= x[6];
+ e[5] ^= x[7];
+}
+
+static void
+do_chi_submix13 (u32 *e, u32 *x)
+{
+ e[6] ^= (x[0] << 16) | (x[7] >> 16);
+ e[7] ^= (x[1] << 16) | (x[0] >> 16);
+ e[0] ^= (x[2] << 16) | (x[1] >> 16);
+ e[1] ^= (x[3] << 16) | (x[2] >> 16);
+ e[2] ^= (x[4] << 16) | (x[3] >> 16);
+ e[3] ^= (x[5] << 16) | (x[4] >> 16);
+ e[4] ^= (x[6] << 16) | (x[5] >> 16);
+ e[5] ^= (x[7] << 16) | (x[6] >> 16);
+}
+
+static void
+do_add (u32 *s, u32 *a)
+{
+ u32 carry = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ u32 op = carry + a[i];
+ s[i] += op;
+ carry = (a[i] > op) || (op > s[i]);
+ }
+}
+
+static unsigned int
+do_hash_step (GOSTR3411_CONTEXT *hd, u32 *h, u32 *m)
+{
+ u32 u[8], v[8];
+ u32 s[8];
+ u32 k[8];
+ unsigned int burn;
+ int i;
+
+ memcpy (u, h, 32);
+ memcpy (v, m, 32);
+
+ for (i = 0; i < 4; i++) {
+ do_p (k, u, v);
+
+ burn = _gcry_gost_enc_data (k, &s[2*i], &s[2*i+1], h[2*i], h[2*i+1], hd->cryptopro);
+
+ do_a (u);
+ if (i == 1)
+ do_apply_c2 (u);
+ do_a2 (v);
+ }
+
+ for (i = 0; i < 5; i++)
+ {
+ do_chi_doublestep (s, 0);
+ do_chi_doublestep (s, 1);
+ do_chi_doublestep (s, 2);
+ do_chi_doublestep (s, 3);
+ do_chi_doublestep (s, 4);
+ /* That is in total 12 + 1 + 61 = 74 = 16 * 4 + 10 rounds */
+ if (i == 4)
+ break;
+ do_chi_doublestep (s, 5);
+ if (i == 0)
+ do_chi_submix12(s, m);
+ do_chi_step12 (s);
+ if (i == 0)
+ do_chi_submix13(s, h);
+ do_chi_step13 (s);
+ do_chi_doublestep (s, 7);
+ }
+
+ memcpy (h, s+5, 12);
+ memcpy (h+3, s, 20);
+
+ return /* burn_stack */ 4 * sizeof(void*) /* func call (ret addr + args) */ +
+ 4 * 32 + 2 * sizeof(int) /* stack */ +
+ max(burn /* _gcry_gost_enc_one */,
+ sizeof(void*) * 2 /* do_a2 call */ +
+ 16 + sizeof(int) /* do_a2 stack */ );
+}
+
+static unsigned int
+transform_blk (void *ctx, const unsigned char *data)
+{
+ GOSTR3411_CONTEXT *hd = ctx;
+ u32 m[8];
+ unsigned int burn;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ m[i] = buf_get_le32(data + i*4);
+ burn = do_hash_step (hd, hd->h, m);
+ do_add (hd->sigma, m);
+
+ return /* burn_stack */ burn + 3 * sizeof(void*) + 32 + 2 * sizeof(void*);
+}
+
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+ unsigned int burn;
+
+ do
+ {
+ burn = transform_blk (c, data);
+ data += 32;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+
+/*
+ The routine finally terminates the computation and returns the
+ digest. The handle is prepared for a new cycle, but adding bytes
+ to the handle will the destroy the returned buffer. Returns: 32
+ bytes with the message the digest. */
+static void
+gost3411_final (void *context)
+{
+ GOSTR3411_CONTEXT *hd = context;
+ size_t padlen = 0;
+ u32 l[8];
+ int i;
+ MD_NBLOCKS_TYPE nblocks;
+
+ if (hd->bctx.count > 0)
+ {
+ padlen = 32 - hd->bctx.count;
+ memset (hd->bctx.buf + hd->bctx.count, 0, padlen);
+ hd->bctx.count += padlen;
+ _gcry_md_block_write (hd, NULL, 0); /* flush */;
+ }
+
+ if (hd->bctx.count != 0)
+ return; /* Something went wrong */
+
+ memset (l, 0, 32);
+
+ nblocks = hd->bctx.nblocks;
+ if (padlen)
+ {
+ nblocks --;
+ l[0] = 256 - padlen * 8;
+ }
+ l[0] |= nblocks << 8;
+ nblocks >>= 24;
+
+ for (i = 1; i < 8 && nblocks != 0; i++)
+ {
+ l[i] = nblocks;
+ nblocks >>= 24;
+ }
+
+ do_hash_step (hd, hd->h, l);
+ do_hash_step (hd, hd->h, hd->sigma);
+ for (i = 0; i < 8; i++)
+ hd->h[i] = le_bswap32(hd->h[i]);
+}
+
+static byte *
+gost3411_read (void *context)
+{
+ GOSTR3411_CONTEXT *hd = context;
+
+ return hd->result;
+}
+
+static unsigned char asn[6] = /* Object ID is 1.2.643.2.2.3 */
+ { 0x2a, 0x85, 0x03, 0x02, 0x02, 0x03 };
+
+static gcry_md_oid_spec_t oid_spec_gostr3411[] =
+ {
+ /* iso.member-body.ru.rans.cryptopro.3 (gostR3411-94-with-gostR3410-2001) */
+ { "1.2.643.2.2.3" },
+ /* iso.member-body.ru.rans.cryptopro.9 (gostR3411-94) */
+ { "1.2.643.2.2.9" },
+ {NULL},
+ };
+
+gcry_md_spec_t _gcry_digest_spec_gost3411_94 =
+ {
+ GCRY_MD_GOSTR3411_94, {0, 0},
+ "GOSTR3411_94", NULL, 0, NULL, 32,
+ gost3411_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL,
+ NULL, NULL,
+ sizeof (GOSTR3411_CONTEXT)
+ };
+gcry_md_spec_t _gcry_digest_spec_gost3411_cp =
+ {
+ GCRY_MD_GOSTR3411_CP, {0, 0},
+ "GOSTR3411_CP", asn, DIM (asn), oid_spec_gostr3411, 32,
+ gost3411_cp_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL,
+ NULL, NULL,
+ sizeof (GOSTR3411_CONTEXT)
+ };
diff --git a/comm/third_party/libgcrypt/cipher/hash-common.c b/comm/third_party/libgcrypt/cipher/hash-common.c
new file mode 100644
index 0000000000..ed2d7cacd1
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/hash-common.c
@@ -0,0 +1,193 @@
+/* hash-common.c - Common code for hash algorithms
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#include "g10lib.h"
+#include "bufhelp.h"
+#include "hash-common.h"
+
+
+/* Run a selftest for hash algorithm ALGO. If the resulting digest
+ matches EXPECT/EXPECTLEN and everything else is fine as well,
+ return NULL. If an error occurs, return a static text string
+ describing the error.
+
+ DATAMODE controls what will be hashed according to this table:
+
+ 0 - Hash the supplied DATA of DATALEN.
+ 1 - Hash one million times a 'a'. DATA and DATALEN are ignored.
+
+*/
+const char *
+_gcry_hash_selftest_check_one (int algo,
+ int datamode, const void *data, size_t datalen,
+ const void *expect, size_t expectlen)
+{
+ const char *result = NULL;
+ gcry_error_t err = 0;
+ gcry_md_hd_t hd;
+ unsigned char *digest;
+ char aaa[1000];
+ int xof = 0;
+
+ if (_gcry_md_get_algo_dlen (algo) == 0)
+ xof = 1;
+ else if (_gcry_md_get_algo_dlen (algo) != expectlen)
+ return "digest size does not match expected size";
+
+ err = _gcry_md_open (&hd, algo, 0);
+ if (err)
+ return "gcry_md_open failed";
+
+ switch (datamode)
+ {
+ case 0:
+ _gcry_md_write (hd, data, datalen);
+ break;
+
+ case 1: /* Hash one million times an "a". */
+ {
+ int i;
+
+ /* Write in odd size chunks so that we test the buffering. */
+ memset (aaa, 'a', 1000);
+ for (i = 0; i < 1000; i++)
+ _gcry_md_write (hd, aaa, 1000);
+ }
+ break;
+
+ default:
+ result = "invalid DATAMODE";
+ }
+
+ if (!result)
+ {
+ if (!xof)
+ {
+ digest = _gcry_md_read (hd, algo);
+
+ if ( memcmp (digest, expect, expectlen) )
+ result = "digest mismatch";
+ }
+ else
+ {
+ gcry_assert(expectlen <= sizeof(aaa));
+
+ err = _gcry_md_extract (hd, algo, aaa, expectlen);
+ if (err)
+ result = "error extracting output from XOF";
+ else if ( memcmp (aaa, expect, expectlen) )
+ result = "digest mismatch";
+ }
+ }
+
+ _gcry_md_close (hd);
+
+ return result;
+}
+
+
+/* Common function to write a chunk of data to the transform function
+ of a hash algorithm. Note that the use of the term "block" does
+ not imply a fixed size block. Note that we explicitly allow to use
+ this function after the context has been finalized; the result does
+ not have any meaning but writing after finalize is sometimes
+ helpful to mitigate timing attacks. */
+void
+_gcry_md_block_write (void *context, const void *inbuf_arg, size_t inlen)
+{
+ const unsigned char *inbuf = inbuf_arg;
+ gcry_md_block_ctx_t *hd = context;
+ unsigned int stack_burn = 0;
+ unsigned int nburn;
+ const unsigned int blocksize_shift = hd->blocksize_shift;
+ const unsigned int blocksize = 1 << blocksize_shift;
+ size_t inblocks;
+ size_t copylen;
+
+ if (sizeof(hd->buf) < blocksize)
+ BUG();
+
+ if (!hd->bwrite)
+ return;
+
+ if (hd->count > blocksize)
+ {
+ /* This happens only when gcry_md_write is called after final.
+ * Writing after final is used for mitigating timing attacks. */
+ hd->count = 0;
+ }
+
+ while (hd->count)
+ {
+ if (hd->count == blocksize) /* Flush the buffer. */
+ {
+ nburn = hd->bwrite (hd, hd->buf, 1);
+ stack_burn = nburn > stack_burn ? nburn : stack_burn;
+ hd->count = 0;
+ if (!++hd->nblocks)
+ hd->nblocks_high++;
+ }
+ else
+ {
+ copylen = inlen;
+ if (copylen > blocksize - hd->count)
+ copylen = blocksize - hd->count;
+
+ if (copylen == 0)
+ break;
+
+ buf_cpy (&hd->buf[hd->count], inbuf, copylen);
+ hd->count += copylen;
+ inbuf += copylen;
+ inlen -= copylen;
+ }
+ }
+
+ if (inlen == 0)
+ return;
+
+ if (inlen >= blocksize)
+ {
+ inblocks = inlen >> blocksize_shift;
+ nburn = hd->bwrite (hd, inbuf, inblocks);
+ stack_burn = nburn > stack_burn ? nburn : stack_burn;
+ hd->count = 0;
+ hd->nblocks_high += (hd->nblocks + inblocks < inblocks);
+ hd->nblocks += inblocks;
+ inlen -= inblocks << blocksize_shift;
+ inbuf += inblocks << blocksize_shift;
+ }
+
+ if (inlen)
+ {
+ buf_cpy (hd->buf, inbuf, inlen);
+ hd->count = inlen;
+ }
+
+ if (stack_burn > 0)
+ _gcry_burn_stack (stack_burn);
+}
diff --git a/comm/third_party/libgcrypt/cipher/hash-common.h b/comm/third_party/libgcrypt/cipher/hash-common.h
new file mode 100644
index 0000000000..561e77a7e5
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/hash-common.h
@@ -0,0 +1,62 @@
+/* hash-common.h - Declarations of common code for hash algorithms.
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_HASH_COMMON_H
+#define GCRY_HASH_COMMON_H
+
+#include "types.h"
+
+
+const char * _gcry_hash_selftest_check_one
+/**/ (int algo,
+ int datamode, const void *data, size_t datalen,
+ const void *expect, size_t expectlen);
+
+/* Type for the md_write helper function. */
+typedef unsigned int (*_gcry_md_block_write_t) (void *c,
+ const unsigned char *blks,
+ size_t nblks);
+
+#if (defined(USE_SHA512) || defined(USE_WHIRLPOOL))
+/* SHA-512 and Whirlpool needs u64. SHA-512 needs larger buffer. */
+# define MD_BLOCK_MAX_BLOCKSIZE 128
+# define MD_NBLOCKS_TYPE u64
+#else
+# define MD_BLOCK_MAX_BLOCKSIZE 64
+# define MD_NBLOCKS_TYPE u32
+#endif
+
+/* SHA1 needs 2x64 bytes and SHA-512 needs 128 bytes. */
+#define MD_BLOCK_CTX_BUFFER_SIZE 128
+
+typedef struct gcry_md_block_ctx
+{
+ byte buf[MD_BLOCK_CTX_BUFFER_SIZE];
+ MD_NBLOCKS_TYPE nblocks;
+ MD_NBLOCKS_TYPE nblocks_high;
+ int count;
+ unsigned int blocksize_shift;
+ _gcry_md_block_write_t bwrite;
+} gcry_md_block_ctx_t;
+
+
+void
+_gcry_md_block_write( void *context, const void *inbuf_arg, size_t inlen);
+
+#endif /*GCRY_HASH_COMMON_H*/
diff --git a/comm/third_party/libgcrypt/cipher/idea.c b/comm/third_party/libgcrypt/cipher/idea.c
new file mode 100644
index 0000000000..0a81081810
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/idea.c
@@ -0,0 +1,382 @@
+/* idea.c - IDEA function
+ * Copyright 1997, 1998, 1999, 2001 Werner Koch (dd9jn)
+ * Copyright 2013 g10 Code GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * WERNER KOCH BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Werner Koch shall not be
+ * used in advertising or otherwise to promote the sale, use or other dealings
+ * in this Software without prior written authorization from Werner Koch.
+ *
+ * Patents on IDEA have expired:
+ * Europe: EP0482154 on 2011-05-16,
+ * Japan: JP3225440 on 2011-05-16,
+ * U.S.: 5,214,703 on 2012-01-07.
+ */
+
+/*
+ * Please see http://www.noepatents.org/ to learn why software patents
+ * are bad for society and what you can do to fight them.
+ *
+ * The code herein is based on the one from:
+ * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996.
+ * ISBN 0-471-11709-9.
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "cipher-internal.h"
+
+
+#define IDEA_KEYSIZE 16
+#define IDEA_BLOCKSIZE 8
+#define IDEA_ROUNDS 8
+#define IDEA_KEYLEN (6*IDEA_ROUNDS+4)
+
+typedef struct {
+ u16 ek[IDEA_KEYLEN];
+ u16 dk[IDEA_KEYLEN];
+ int have_dk;
+} IDEA_context;
+
+static const char *selftest(void);
+
+
+static u16
+mul_inv( u16 x )
+{
+ u16 t0, t1;
+ u16 q, y;
+
+ if( x < 2 )
+ return x;
+ t1 = 0x10001UL / x;
+ y = 0x10001UL % x;
+ if( y == 1 )
+ return (1-t1) & 0xffff;
+
+ t0 = 1;
+ do {
+ q = x / y;
+ x = x % y;
+ t0 += q * t1;
+ if( x == 1 )
+ return t0;
+ q = y / x;
+ y = y % x;
+ t1 += q * t0;
+ } while( y != 1 );
+ return (1-t1) & 0xffff;
+}
+
+
+
+static void
+expand_key( const byte *userkey, u16 *ek )
+{
+ int i,j;
+
+ for(j=0; j < 8; j++ ) {
+ ek[j] = (*userkey << 8) + userkey[1];
+ userkey += 2;
+ }
+ for(i=0; j < IDEA_KEYLEN; j++ ) {
+ i++;
+ ek[i+7] = ek[i&7] << 9 | ek[(i+1)&7] >> 7;
+ ek += i & 8;
+ i &= 7;
+ }
+}
+
+
+static void
+invert_key( u16 *ek, u16 dk[IDEA_KEYLEN] )
+{
+ int i;
+ u16 t1, t2, t3;
+ u16 temp[IDEA_KEYLEN];
+ u16 *p = temp + IDEA_KEYLEN;
+
+ t1 = mul_inv( *ek++ );
+ t2 = -*ek++;
+ t3 = -*ek++;
+ *--p = mul_inv( *ek++ );
+ *--p = t3;
+ *--p = t2;
+ *--p = t1;
+
+ for(i=0; i < IDEA_ROUNDS-1; i++ ) {
+ t1 = *ek++;
+ *--p = *ek++;
+ *--p = t1;
+
+ t1 = mul_inv( *ek++ );
+ t2 = -*ek++;
+ t3 = -*ek++;
+ *--p = mul_inv( *ek++ );
+ *--p = t2;
+ *--p = t3;
+ *--p = t1;
+ }
+ t1 = *ek++;
+ *--p = *ek++;
+ *--p = t1;
+
+ t1 = mul_inv( *ek++ );
+ t2 = -*ek++;
+ t3 = -*ek++;
+ *--p = mul_inv( *ek++ );
+ *--p = t3;
+ *--p = t2;
+ *--p = t1;
+ memcpy(dk, temp, sizeof(temp) );
+ wipememory(temp, sizeof(temp));
+}
+
+
+static void
+cipher( byte *outbuf, const byte *inbuf, u16 *key )
+{
+ u16 s2, s3;
+ u16 in[4];
+ int r = IDEA_ROUNDS;
+#define x1 (in[0])
+#define x2 (in[1])
+#define x3 (in[2])
+#define x4 (in[3])
+#define MUL(x,y) \
+ do {u16 _t16; u32 _t32; \
+ if( (_t16 = (y)) ) { \
+ if( (x = (x)&0xffff) ) { \
+ _t32 = (u32)x * _t16; \
+ x = _t32 & 0xffff; \
+ _t16 = _t32 >> 16; \
+ x = ((x)-_t16) + (x<_t16?1:0); \
+ } \
+ else { \
+ x = 1 - _t16; \
+ } \
+ } \
+ else { \
+ x = 1 - x; \
+ } \
+ } while(0)
+
+ memcpy (in, inbuf, sizeof in);
+#ifndef WORDS_BIGENDIAN
+ x1 = (x1>>8) | (x1<<8);
+ x2 = (x2>>8) | (x2<<8);
+ x3 = (x3>>8) | (x3<<8);
+ x4 = (x4>>8) | (x4<<8);
+#endif
+ do {
+ MUL(x1, *key++);
+ x2 += *key++;
+ x3 += *key++;
+ MUL(x4, *key++ );
+
+ s3 = x3;
+ x3 ^= x1;
+ MUL(x3, *key++);
+ s2 = x2;
+ x2 ^=x4;
+ x2 += x3;
+ MUL(x2, *key++);
+ x3 += x2;
+
+ x1 ^= x2;
+ x4 ^= x3;
+
+ x2 ^= s3;
+ x3 ^= s2;
+ } while( --r );
+ MUL(x1, *key++);
+ x3 += *key++;
+ x2 += *key++;
+ MUL(x4, *key);
+
+#ifndef WORDS_BIGENDIAN
+ x1 = (x1>>8) | (x1<<8);
+ x2 = (x2>>8) | (x2<<8);
+ x3 = (x3>>8) | (x3<<8);
+ x4 = (x4>>8) | (x4<<8);
+#endif
+ memcpy (outbuf+0, &x1, 2);
+ memcpy (outbuf+2, &x3, 2);
+ memcpy (outbuf+4, &x2, 2);
+ memcpy (outbuf+6, &x4, 2);
+#undef MUL
+#undef x1
+#undef x2
+#undef x3
+#undef x4
+}
+
+
+static int
+do_setkey( IDEA_context *c, const byte *key, unsigned int keylen )
+{
+ static int initialized = 0;
+ static const char *selftest_failed = 0;
+
+ if( !initialized ) {
+ initialized = 1;
+ selftest_failed = selftest();
+ if( selftest_failed )
+ log_error( "%s\n", selftest_failed );
+ }
+ if( selftest_failed )
+ return GPG_ERR_SELFTEST_FAILED;
+
+ assert(keylen == 16);
+ c->have_dk = 0;
+ expand_key( key, c->ek );
+ invert_key( c->ek, c->dk );
+ return 0;
+}
+
+static gcry_err_code_t
+idea_setkey (void *context, const byte *key, unsigned int keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ IDEA_context *ctx = context;
+ int rc = do_setkey (ctx, key, keylen);
+ (void)bulk_ops;
+ _gcry_burn_stack (23+6*sizeof(void*));
+ return rc;
+}
+
+static void
+encrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf )
+{
+ cipher( outbuf, inbuf, c->ek );
+}
+
+static unsigned int
+idea_encrypt (void *context, byte *out, const byte *in)
+{
+ IDEA_context *ctx = context;
+ encrypt_block (ctx, out, in);
+ return /*burn_stack*/ (24+3*sizeof (void*));
+}
+
+static void
+decrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf )
+{
+ if( !c->have_dk ) {
+ c->have_dk = 1;
+ invert_key( c->ek, c->dk );
+ }
+ cipher( outbuf, inbuf, c->dk );
+}
+
+static unsigned int
+idea_decrypt (void *context, byte *out, const byte *in)
+{
+ IDEA_context *ctx = context;
+ decrypt_block (ctx, out, in);
+ return /*burn_stack*/ (24+3*sizeof (void*));
+}
+
+
+static const char *
+selftest( void )
+{
+static struct {
+ byte key[16];
+ byte plain[8];
+ byte cipher[8];
+} test_vectors[] = {
+ { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+ 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 },
+ { 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03 },
+ { 0x11, 0xFB, 0xED, 0x2B, 0x01, 0x98, 0x6D, 0xE5 } },
+ { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+ 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 },
+ { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 },
+ { 0x54, 0x0E, 0x5F, 0xEA, 0x18, 0xC2, 0xF8, 0xB1 } },
+ { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+ 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 },
+ { 0x00, 0x19, 0x32, 0x4B, 0x64, 0x7D, 0x96, 0xAF },
+ { 0x9F, 0x0A, 0x0A, 0xB6, 0xE1, 0x0C, 0xED, 0x78 } },
+ { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+ 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 },
+ { 0xF5, 0x20, 0x2D, 0x5B, 0x9C, 0x67, 0x1B, 0x08 },
+ { 0xCF, 0x18, 0xFD, 0x73, 0x55, 0xE2, 0xC5, 0xC5 } },
+ { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+ 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 },
+ { 0xFA, 0xE6, 0xD2, 0xBE, 0xAA, 0x96, 0x82, 0x6E },
+ { 0x85, 0xDF, 0x52, 0x00, 0x56, 0x08, 0x19, 0x3D } },
+ { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+ 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 },
+ { 0x0A, 0x14, 0x1E, 0x28, 0x32, 0x3C, 0x46, 0x50 },
+ { 0x2F, 0x7D, 0xE7, 0x50, 0x21, 0x2F, 0xB7, 0x34 } },
+ { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+ 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 },
+ { 0x05, 0x0A, 0x0F, 0x14, 0x19, 0x1E, 0x23, 0x28 },
+ { 0x7B, 0x73, 0x14, 0x92, 0x5D, 0xE5, 0x9C, 0x09 } },
+ { { 0x00, 0x05, 0x00, 0x0A, 0x00, 0x0F, 0x00, 0x14,
+ 0x00, 0x19, 0x00, 0x1E, 0x00, 0x23, 0x00, 0x28 },
+ { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 },
+ { 0x3E, 0xC0, 0x47, 0x80, 0xBE, 0xFF, 0x6E, 0x20 } },
+ { { 0x3A, 0x98, 0x4E, 0x20, 0x00, 0x19, 0x5D, 0xB3,
+ 0x2E, 0xE5, 0x01, 0xC8, 0xC4, 0x7C, 0xEA, 0x60 },
+ { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 },
+ { 0x97, 0xBC, 0xD8, 0x20, 0x07, 0x80, 0xDA, 0x86 } },
+ { { 0x00, 0x64, 0x00, 0xC8, 0x01, 0x2C, 0x01, 0x90,
+ 0x01, 0xF4, 0x02, 0x58, 0x02, 0xBC, 0x03, 0x20 },
+ { 0x05, 0x32, 0x0A, 0x64, 0x14, 0xC8, 0x19, 0xFA },
+ { 0x65, 0xBE, 0x87, 0xE7, 0xA2, 0x53, 0x8A, 0xED } },
+ { { 0x9D, 0x40, 0x75, 0xC1, 0x03, 0xBC, 0x32, 0x2A,
+ 0xFB, 0x03, 0xE7, 0xBE, 0x6A, 0xB3, 0x00, 0x06 },
+ { 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 },
+ { 0xF5, 0xDB, 0x1A, 0xC4, 0x5E, 0x5E, 0xF9, 0xF9 } }
+};
+ IDEA_context c;
+ byte buffer[8];
+ int i;
+
+ for(i=0; i < DIM(test_vectors); i++ ) {
+ do_setkey( &c, test_vectors[i].key, 16 );
+ encrypt_block( &c, buffer, test_vectors[i].plain );
+ if( memcmp( buffer, test_vectors[i].cipher, 8 ) )
+ return "IDEA test encryption failed.";
+ decrypt_block( &c, buffer, test_vectors[i].cipher );
+ if( memcmp( buffer, test_vectors[i].plain, 8 ) )
+ return "IDEA test decryption failed.";
+ }
+
+ return NULL;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_idea =
+ {
+ GCRY_CIPHER_IDEA, {0, 0},
+ "IDEA", NULL, NULL, IDEA_BLOCKSIZE, 128,
+ sizeof (IDEA_context),
+ idea_setkey, idea_encrypt, idea_decrypt
+ };
diff --git a/comm/third_party/libgcrypt/cipher/kdf-internal.h b/comm/third_party/libgcrypt/cipher/kdf-internal.h
new file mode 100644
index 0000000000..7079860e99
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/kdf-internal.h
@@ -0,0 +1,40 @@
+/* kdf-internal.h - Internal defs for kdf.c
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_KDF_INTERNAL_H
+#define GCRY_KDF_INTERNAL_H
+
+/*-- kdf.c --*/
+gpg_err_code_t
+_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen,
+ int hashalgo,
+ const void *salt, size_t saltlen,
+ unsigned long iterations,
+ size_t keysize, void *keybuffer);
+
+/*-- scrypt.c --*/
+gcry_err_code_t
+_gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen,
+ int algo, int subalgo,
+ const unsigned char *salt, size_t saltlen,
+ unsigned long iterations,
+ size_t dklen, unsigned char *dk);
+
+
+#endif /*GCRY_KDF_INTERNAL_H*/
diff --git a/comm/third_party/libgcrypt/cipher/kdf.c b/comm/third_party/libgcrypt/cipher/kdf.c
new file mode 100644
index 0000000000..93c2c9f65e
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/kdf.c
@@ -0,0 +1,503 @@
+/* kdf.c - Key Derivation Functions
+ * Copyright (C) 1998, 2008, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "kdf-internal.h"
+
+
+/* Transform a passphrase into a suitable key of length KEYSIZE and
+ store this key in the caller provided buffer KEYBUFFER. The caller
+ must provide an HASHALGO, a valid ALGO and depending on that algo a
+ SALT of 8 bytes and the number of ITERATIONS. Code taken from
+ gnupg/agent/protect.c:hash_passphrase. */
+static gpg_err_code_t
+openpgp_s2k (const void *passphrase, size_t passphraselen,
+ int algo, int hashalgo,
+ const void *salt, size_t saltlen,
+ unsigned long iterations,
+ size_t keysize, void *keybuffer)
+{
+ gpg_err_code_t ec;
+ gcry_md_hd_t md;
+ char *key = keybuffer;
+ int pass, i;
+ int used = 0;
+ int secmode;
+
+ if ((algo == GCRY_KDF_SALTED_S2K || algo == GCRY_KDF_ITERSALTED_S2K)
+ && (!salt || saltlen != 8))
+ return GPG_ERR_INV_VALUE;
+
+ secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer);
+
+ ec = _gcry_md_open (&md, hashalgo, secmode? GCRY_MD_FLAG_SECURE : 0);
+ if (ec)
+ return ec;
+
+ for (pass=0; used < keysize; pass++)
+ {
+ if (pass)
+ {
+ _gcry_md_reset (md);
+ for (i=0; i < pass; i++) /* Preset the hash context. */
+ _gcry_md_putc (md, 0);
+ }
+
+ if (algo == GCRY_KDF_SALTED_S2K || algo == GCRY_KDF_ITERSALTED_S2K)
+ {
+ int len2 = passphraselen + 8;
+ unsigned long count = len2;
+
+ if (algo == GCRY_KDF_ITERSALTED_S2K)
+ {
+ count = iterations;
+ if (count < len2)
+ count = len2;
+ }
+
+ while (count > len2)
+ {
+ _gcry_md_write (md, salt, saltlen);
+ _gcry_md_write (md, passphrase, passphraselen);
+ count -= len2;
+ }
+ if (count < saltlen)
+ _gcry_md_write (md, salt, count);
+ else
+ {
+ _gcry_md_write (md, salt, saltlen);
+ count -= saltlen;
+ _gcry_md_write (md, passphrase, count);
+ }
+ }
+ else
+ _gcry_md_write (md, passphrase, passphraselen);
+
+ _gcry_md_final (md);
+ i = _gcry_md_get_algo_dlen (hashalgo);
+ if (i > keysize - used)
+ i = keysize - used;
+ memcpy (key+used, _gcry_md_read (md, hashalgo), i);
+ used += i;
+ }
+ _gcry_md_close (md);
+ return 0;
+}
+
+
+/* Transform a passphrase into a suitable key of length KEYSIZE and
+ store this key in the caller provided buffer KEYBUFFER. The caller
+ must provide PRFALGO which indicates the pseudorandom function to
+ use: This shall be the algorithms id of a hash algorithm; it is
+ used in HMAC mode. SALT is a salt of length SALTLEN and ITERATIONS
+ gives the number of iterations. */
+gpg_err_code_t
+_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen,
+ int hashalgo,
+ const void *salt, size_t saltlen,
+ unsigned long iterations,
+ size_t keysize, void *keybuffer)
+{
+ gpg_err_code_t ec;
+ gcry_md_hd_t md;
+ int secmode;
+ unsigned long dklen = keysize;
+ char *dk = keybuffer;
+ unsigned int hlen; /* Output length of the digest function. */
+ unsigned int l; /* Rounded up number of blocks. */
+ unsigned int r; /* Number of octets in the last block. */
+ char *sbuf; /* Malloced buffer to concatenate salt and iter
+ as well as space to hold TBUF and UBUF. */
+ char *tbuf; /* Buffer for T; ptr into SBUF, size is HLEN. */
+ char *ubuf; /* Buffer for U; ptr into SBUF, size is HLEN. */
+ unsigned int lidx; /* Current block number. */
+ unsigned long iter; /* Current iteration number. */
+ unsigned int i;
+
+ /* We allow for a saltlen of 0 here to support scrypt. It is not
+ clear whether rfc2898 allows for this this, thus we do a test on
+ saltlen > 0 only in gcry_kdf_derive. */
+ if (!salt || !iterations || !dklen)
+ return GPG_ERR_INV_VALUE;
+
+ hlen = _gcry_md_get_algo_dlen (hashalgo);
+ if (!hlen)
+ return GPG_ERR_DIGEST_ALGO;
+
+ secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer);
+
+ /* Step 1 */
+ /* If dkLen > (2^32 - 1) * hLen, output "derived key too long" and
+ * stop. We use a stronger inequality but only if our type can hold
+ * a larger value. */
+
+#if SIZEOF_UNSIGNED_LONG > 4
+ if (dklen > 0xffffffffU)
+ return GPG_ERR_INV_VALUE;
+#endif
+
+
+ /* Step 2 */
+ l = ((dklen - 1)/ hlen) + 1;
+ r = dklen - (l - 1) * hlen;
+
+ /* Setup buffers and prepare a hash context. */
+ sbuf = (secmode
+ ? xtrymalloc_secure (saltlen + 4 + hlen + hlen)
+ : xtrymalloc (saltlen + 4 + hlen + hlen));
+ if (!sbuf)
+ return gpg_err_code_from_syserror ();
+ tbuf = sbuf + saltlen + 4;
+ ubuf = tbuf + hlen;
+
+ ec = _gcry_md_open (&md, hashalgo, (GCRY_MD_FLAG_HMAC
+ | (secmode?GCRY_MD_FLAG_SECURE:0)));
+ if (ec)
+ {
+ xfree (sbuf);
+ return ec;
+ }
+
+ ec = _gcry_md_setkey (md, passphrase, passphraselen);
+ if (ec)
+ {
+ _gcry_md_close (md);
+ xfree (sbuf);
+ return ec;
+ }
+
+ /* Step 3 and 4. */
+ memcpy (sbuf, salt, saltlen);
+ for (lidx = 1; lidx <= l; lidx++)
+ {
+ for (iter = 0; iter < iterations; iter++)
+ {
+ _gcry_md_reset (md);
+ if (!iter) /* Compute U_1: */
+ {
+ sbuf[saltlen] = (lidx >> 24);
+ sbuf[saltlen + 1] = (lidx >> 16);
+ sbuf[saltlen + 2] = (lidx >> 8);
+ sbuf[saltlen + 3] = lidx;
+ _gcry_md_write (md, sbuf, saltlen + 4);
+ memcpy (ubuf, _gcry_md_read (md, 0), hlen);
+ memcpy (tbuf, ubuf, hlen);
+ }
+ else /* Compute U_(2..c): */
+ {
+ _gcry_md_write (md, ubuf, hlen);
+ memcpy (ubuf, _gcry_md_read (md, 0), hlen);
+ for (i=0; i < hlen; i++)
+ tbuf[i] ^= ubuf[i];
+ }
+ }
+ if (lidx == l) /* Last block. */
+ memcpy (dk, tbuf, r);
+ else
+ {
+ memcpy (dk, tbuf, hlen);
+ dk += hlen;
+ }
+ }
+
+ _gcry_md_close (md);
+ xfree (sbuf);
+ return 0;
+}
+
+
+/* Derive a key from a passphrase. KEYSIZE gives the requested size
+ of the keys in octets. KEYBUFFER is a caller provided buffer
+ filled on success with the derived key. The input passphrase is
+ taken from (PASSPHRASE,PASSPHRASELEN) which is an arbitrary memory
+ buffer. ALGO specifies the KDF algorithm to use; these are the
+ constants GCRY_KDF_*. SUBALGO specifies an algorithm used
+ internally by the KDF algorithms; this is usually a hash algorithm
+ but certain KDF algorithm may use it differently. {SALT,SALTLEN}
+ is a salt as needed by most KDF algorithms. ITERATIONS is a
+ positive integer parameter to most KDFs. 0 is returned on success,
+ or an error code on failure. */
+gpg_err_code_t
+_gcry_kdf_derive (const void *passphrase, size_t passphraselen,
+ int algo, int subalgo,
+ const void *salt, size_t saltlen,
+ unsigned long iterations,
+ size_t keysize, void *keybuffer)
+{
+ gpg_err_code_t ec;
+
+ if (!passphrase)
+ {
+ ec = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ if (!keybuffer || !keysize)
+ {
+ ec = GPG_ERR_INV_VALUE;
+ goto leave;
+ }
+
+
+ switch (algo)
+ {
+ case GCRY_KDF_SIMPLE_S2K:
+ case GCRY_KDF_SALTED_S2K:
+ case GCRY_KDF_ITERSALTED_S2K:
+ if (!passphraselen)
+ ec = GPG_ERR_INV_DATA;
+ else
+ ec = openpgp_s2k (passphrase, passphraselen, algo, subalgo,
+ salt, saltlen, iterations, keysize, keybuffer);
+ break;
+
+ case GCRY_KDF_PBKDF1:
+ ec = GPG_ERR_UNSUPPORTED_ALGORITHM;
+ break;
+
+ case GCRY_KDF_PBKDF2:
+ if (!saltlen)
+ ec = GPG_ERR_INV_VALUE;
+ else
+ ec = _gcry_kdf_pkdf2 (passphrase, passphraselen, subalgo,
+ salt, saltlen, iterations, keysize, keybuffer);
+ break;
+
+ case 41:
+ case GCRY_KDF_SCRYPT:
+#if USE_SCRYPT
+ ec = _gcry_kdf_scrypt (passphrase, passphraselen, algo, subalgo,
+ salt, saltlen, iterations, keysize, keybuffer);
+#else
+ ec = GPG_ERR_UNSUPPORTED_ALGORITHM;
+#endif /*USE_SCRYPT*/
+ break;
+
+ default:
+ ec = GPG_ERR_UNKNOWN_ALGORITHM;
+ break;
+ }
+
+ leave:
+ return ec;
+}
+
+
+/* Check one KDF call with ALGO and HASH_ALGO using the regular KDF
+ * API. (passphrase,passphraselen) is the password to be derived,
+ * (salt,saltlen) the salt for the key derivation,
+ * iterations is the number of the kdf iterations,
+ * and (expect,expectlen) the expected result. Returns NULL on
+ * success or a string describing the failure. */
+
+static const char *
+check_one (int algo, int hash_algo,
+ const void *passphrase, size_t passphraselen,
+ const void *salt, size_t saltlen,
+ unsigned long iterations,
+ const void *expect, size_t expectlen)
+{
+ unsigned char key[512]; /* hardcoded to avoid allocation */
+ size_t keysize = expectlen;
+
+ if (keysize > sizeof(key))
+ return "invalid tests data";
+
+ if (_gcry_kdf_derive (passphrase, passphraselen, algo,
+ hash_algo, salt, saltlen, iterations,
+ keysize, key))
+ return "gcry_kdf_derive failed";
+
+ if (memcmp (key, expect, expectlen))
+ return "does not match";
+
+ return NULL;
+}
+
+
+static gpg_err_code_t
+selftest_pbkdf2 (int extended, selftest_report_func_t report)
+{
+ static const struct {
+ const char *desc;
+ const char *p; /* Passphrase. */
+ size_t plen; /* Length of P. */
+ const char *salt;
+ size_t saltlen;
+ int hashalgo;
+ unsigned long c; /* Iterations. */
+ int dklen; /* Requested key length. */
+ const char *dk; /* Derived key. */
+ int disabled;
+ } tv[] = {
+#if USE_SHA1
+#define NUM_TEST_VECTORS 9
+ /* SHA1 test vectors are from RFC-6070. */
+ {
+ "Basic PBKDF2 SHA1 #1",
+ "password", 8,
+ "salt", 4,
+ GCRY_MD_SHA1,
+ 1,
+ 20,
+ "\x0c\x60\xc8\x0f\x96\x1f\x0e\x71\xf3\xa9"
+ "\xb5\x24\xaf\x60\x12\x06\x2f\xe0\x37\xa6"
+ },
+ {
+ "Basic PBKDF2 SHA1 #2",
+ "password", 8,
+ "salt", 4,
+ GCRY_MD_SHA1,
+ 2,
+ 20,
+ "\xea\x6c\x01\x4d\xc7\x2d\x6f\x8c\xcd\x1e"
+ "\xd9\x2a\xce\x1d\x41\xf0\xd8\xde\x89\x57"
+ },
+ {
+ "Basic PBKDF2 SHA1 #3",
+ "password", 8,
+ "salt", 4,
+ GCRY_MD_SHA1,
+ 4096,
+ 20,
+ "\x4b\x00\x79\x01\xb7\x65\x48\x9a\xbe\xad"
+ "\x49\xd9\x26\xf7\x21\xd0\x65\xa4\x29\xc1"
+ },
+ {
+ "Basic PBKDF2 SHA1 #4",
+ "password", 8,
+ "salt", 4,
+ GCRY_MD_SHA1,
+ 16777216,
+ 20,
+ "\xee\xfe\x3d\x61\xcd\x4d\xa4\xe4\xe9\x94"
+ "\x5b\x3d\x6b\xa2\x15\x8c\x26\x34\xe9\x84",
+ 1 /* This test takes too long. */
+ },
+ {
+ "Basic PBKDF2 SHA1 #5",
+ "passwordPASSWORDpassword", 24,
+ "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36,
+ GCRY_MD_SHA1,
+ 4096,
+ 25,
+ "\x3d\x2e\xec\x4f\xe4\x1c\x84\x9b\x80\xc8"
+ "\xd8\x36\x62\xc0\xe4\x4a\x8b\x29\x1a\x96"
+ "\x4c\xf2\xf0\x70\x38"
+ },
+ {
+ "Basic PBKDF2 SHA1 #6",
+ "pass\0word", 9,
+ "sa\0lt", 5,
+ GCRY_MD_SHA1,
+ 4096,
+ 16,
+ "\x56\xfa\x6a\xa7\x55\x48\x09\x9d\xcc\x37"
+ "\xd7\xf0\x34\x25\xe0\xc3"
+ },
+ { /* empty password test, not in RFC-6070 */
+ "Basic PBKDF2 SHA1 #7",
+ "", 0,
+ "salt", 4,
+ GCRY_MD_SHA1,
+ 2,
+ 20,
+ "\x13\x3a\x4c\xe8\x37\xb4\xd2\x52\x1e\xe2"
+ "\xbf\x03\xe1\x1c\x71\xca\x79\x4e\x07\x97"
+ },
+#else
+#define NUM_TEST_VECTORS 2
+#endif
+ {
+ "Basic PBKDF2 SHA256",
+ "password", 8,
+ "salt", 4,
+ GCRY_MD_SHA256,
+ 2,
+ 32,
+ "\xae\x4d\x0c\x95\xaf\x6b\x46\xd3\x2d\x0a\xdf\xf9\x28\xf0\x6d\xd0"
+ "\x2a\x30\x3f\x8e\xf3\xc2\x51\xdf\xd6\xe2\xd8\x5a\x95\x47\x4c\x43"
+ },
+ {
+ "Extended PBKDF2 SHA256",
+ "passwordPASSWORDpassword", 24,
+ "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36,
+ GCRY_MD_SHA256,
+ 4096,
+ 40,
+ "\x34\x8c\x89\xdb\xcb\xd3\x2b\x2f\x32\xd8\x14\xb8\x11\x6e\x84\xcf"
+ "\x2b\x17\x34\x7e\xbc\x18\x00\x18\x1c\x4e\x2a\x1f\xb8\xdd\x53\xe1"
+ "\xc6\x35\x51\x8c\x7d\xac\x47\xe9"
+ },
+ { NULL }
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+
+ for (tvidx=0; tv[tvidx].desc; tvidx++)
+ {
+ what = tv[tvidx].desc;
+ if (tv[tvidx].disabled)
+ continue;
+ errtxt = check_one (GCRY_KDF_PBKDF2, tv[tvidx].hashalgo,
+ tv[tvidx].p, tv[tvidx].plen,
+ tv[tvidx].salt, tv[tvidx].saltlen,
+ tv[tvidx].c,
+ tv[tvidx].dk, tv[tvidx].dklen);
+ if (errtxt)
+ goto failed;
+ if (tvidx >= NUM_TEST_VECTORS - 1 && !extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("kdf", GCRY_KDF_PBKDF2, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run the selftests for KDF with KDF algorithm ALGO with optional
+ reporting function REPORT. */
+gpg_error_t
+_gcry_kdf_selftest (int algo, int extended, selftest_report_func_t report)
+{
+ gcry_err_code_t ec = 0;
+
+ if (algo == GCRY_KDF_PBKDF2)
+ ec = selftest_pbkdf2 (extended, report);
+ else
+ {
+ ec = GPG_ERR_UNSUPPORTED_ALGORITHM;
+ if (report)
+ report ("kdf", algo, "module", "algorithm not available");
+ }
+ return gpg_error (ec);
+}
diff --git a/comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S b/comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S
new file mode 100644
index 0000000000..0bec8d50a9
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S
@@ -0,0 +1,945 @@
+/* keccak-armv7-neon.S - ARMv7/NEON implementation of Keccak
+ *
+ * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON)
+
+/* Based on public-domain/CC0 implementation from SUPERCOP package
+ * (keccakc1024/inplace-armv7a-neon/keccak2.s)
+ *
+ * Original copyright header follows:
+ */
+
+@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+@ Michaël Peeters and Gilles Van Assche. For more information, feedback or
+@ questions, please refer to our website: http://keccak.noekeon.org/
+@
+@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
+@
+@ To the extent possible under law, the implementer has waived all copyright
+@ and related or neighboring rights to the source code in this file.
+@ http://creativecommons.org/publicdomain/zero/1.0/
+
+.text
+
+.syntax unified
+.fpu neon
+.arm
+
+
+.extern _gcry_keccak_round_consts_64bit;
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+@// --- offsets in state
+.equ Aba, 0*8
+.equ Aga, 1*8
+.equ Aka, 2*8
+.equ Ama, 3*8
+.equ Asa, 4*8
+
+@// --- macros
+
+.macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
+
+ @Prepare Theta
+ @Ca = Aba^Aga^Aka^Ama^Asa@
+ @Ce = Abe^Age^Ake^Ame^Ase@
+ @Ci = Abi^Agi^Aki^Ami^Asi@
+ @Co = Abo^Ago^Ako^Amo^Aso@
+ @Cu = Abu^Agu^Aku^Amu^Asu@
+ @De = Ca^ROL64(Ci, 1)@
+ @Di = Ce^ROL64(Co, 1)@
+ @Do = Ci^ROL64(Cu, 1)@
+ @Du = Co^ROL64(Ca, 1)@
+ @Da = Cu^ROL64(Ce, 1)@
+
+ veor.64 q4, q6, q7
+ veor.64 q5, q9, q10
+ veor.64 d8, d8, d9
+ veor.64 d10, d10, d11
+ veor.64 d1, d8, d16
+ veor.64 d2, d10, d17
+
+ veor.64 q4, q11, q12
+ veor.64 q5, q14, q15
+ veor.64 d8, d8, d9
+ veor.64 d10, d10, d11
+ veor.64 d3, d8, d26
+
+ vadd.u64 q4, q1, q1
+ veor.64 d4, d10, d27
+ vmov.64 d0, d5
+ vsri.64 q4, q1, #63
+
+ vadd.u64 q5, q2, q2
+ veor.64 q4, q4, q0
+ vsri.64 q5, q2, #63
+ vadd.u64 d7, d1, d1
+ veor.64 \argA2, \argA2, d8
+ veor.64 q5, q5, q1
+
+ vsri.64 d7, d1, #63
+ vshl.u64 d1, \argA2, #44
+ veor.64 \argA3, \argA3, d9
+ veor.64 d7, d7, d4
+
+ @Ba = argA1^Da@
+ @Be = ROL64((argA2^De), 44)@
+ @Bi = ROL64((argA3^Di), 43)@
+ @Bo = ROL64((argA4^Do), 21)@
+ @Bu = ROL64((argA5^Du), 14)@
+ @argA2 = Be ^((~Bi)& Bo )@
+ @argA3 = Bi ^((~Bo)& Bu )@
+ @argA4 = Bo ^((~Bu)& Ba )@
+ @argA5 = Bu ^((~Ba)& Be )@
+ @argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@
+ vsri.64 d1, \argA2, #64-44
+ vshl.u64 d2, \argA3, #43
+ vldr.64 d0, [sp, #\argA1]
+ veor.64 \argA4, \argA4, d10
+ vsri.64 d2, \argA3, #64-43
+ vshl.u64 d3, \argA4, #21
+ veor.64 \argA5, \argA5, d11
+ veor.64 d0, d0, d7
+ vsri.64 d3, \argA4, #64-21
+ vbic.64 d5, d2, d1
+ vshl.u64 d4, \argA5, #14
+ vbic.64 \argA2, d3, d2
+ vld1.64 d6, [ip]!
+ veor.64 d5, d0
+ vsri.64 d4, \argA5, #64-14
+ veor.64 d5, d6
+ vbic.64 \argA5, d1, d0
+ vbic.64 \argA3, d4, d3
+ vbic.64 \argA4, d0, d4
+ veor.64 \argA2, d1
+ vstr.64 d5, [sp, #\argA1]
+ veor.64 \argA3, d2
+ veor.64 \argA4, d3
+ veor.64 \argA5, d4
+
+ .endm
+
+.macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5
+
+ @d2 = ROL64((argA1^Da), 3)@
+ @d3 = ROL64((argA2^De), 45)@
+ @d4 = ROL64((argA3^Di), 61)@
+ @d0 = ROL64((argA4^Do), 28)@
+ @d1 = ROL64((argA5^Du), 20)@
+ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
+ @argA2 = Be ^((~Bi)& Bo )@
+ @argA3 = Bi ^((~Bo)& Bu )@
+ @argA4 = Bo ^((~Bu)& Ba )@
+ @argA5 = Bu ^((~Ba)& Be )@
+
+ veor.64 \argA2, \argA2, d8
+ veor.64 \argA3, \argA3, d9
+ vshl.u64 d3, \argA2, #45
+ vldr.64 d6, [sp, #\argA1]
+ vshl.u64 d4, \argA3, #61
+ veor.64 \argA4, \argA4, d10
+ vsri.64 d3, \argA2, #64-45
+ veor.64 \argA5, \argA5, d11
+ vsri.64 d4, \argA3, #64-61
+ vshl.u64 d0, \argA4, #28
+ veor.64 d6, d6, d7
+ vshl.u64 d1, \argA5, #20
+ vbic.64 \argA3, d4, d3
+ vsri.64 d0, \argA4, #64-28
+ vbic.64 \argA4, d0, d4
+ vshl.u64 d2, d6, #3
+ vsri.64 d1, \argA5, #64-20
+ veor.64 \argA4, d3
+ vsri.64 d2, d6, #64-3
+ vbic.64 \argA5, d1, d0
+ vbic.64 d6, d2, d1
+ vbic.64 \argA2, d3, d2
+ veor.64 d6, d0
+ veor.64 \argA2, d1
+ vstr.64 d6, [sp, #\argA1]
+ veor.64 \argA3, d2
+ veor.64 d5, d6
+ veor.64 \argA5, d4
+
+ .endm
+
+.macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
+
+ @d4 = ROL64((argA1^Da), 18)@
+ @d0 = ROL64((argA2^De), 1)@
+ @d1 = ROL64((argA3^Di), 6)@
+ @d2 = ROL64((argA4^Do), 25)@
+ @d3 = ROL64((argA5^Du), 8)@
+ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
+ @argA2 = Be ^((~Bi)& Bo )@
+ @argA3 = Bi ^((~Bo)& Bu )@
+ @argA4 = Bo ^((~Bu)& Ba )@
+ @argA5 = Bu ^((~Ba)& Be )@
+
+ veor.64 \argA3, \argA3, d9
+ veor.64 \argA4, \argA4, d10
+ vshl.u64 d1, \argA3, #6
+ vldr.64 d6, [sp, #\argA1]
+ vshl.u64 d2, \argA4, #25
+ veor.64 \argA5, \argA5, d11
+ vsri.64 d1, \argA3, #64-6
+ veor.64 \argA2, \argA2, d8
+ vsri.64 d2, \argA4, #64-25
+ vext.8 d3, \argA5, \argA5, #7
+ veor.64 d6, d6, d7
+ vbic.64 \argA3, d2, d1
+ vadd.u64 d0, \argA2, \argA2
+ vbic.64 \argA4, d3, d2
+ vsri.64 d0, \argA2, #64-1
+ vshl.u64 d4, d6, #18
+ veor.64 \argA2, d1, \argA4
+ veor.64 \argA3, d0
+ vsri.64 d4, d6, #64-18
+ vstr.64 \argA3, [sp, #\argA1]
+ veor.64 d5, \argA3
+ vbic.64 \argA5, d1, d0
+ vbic.64 \argA3, d4, d3
+ vbic.64 \argA4, d0, d4
+ veor.64 \argA3, d2
+ veor.64 \argA4, d3
+ veor.64 \argA5, d4
+
+ .endm
+
+.macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
+
+ @d1 = ROL64((argA1^Da), 36)@
+ @d2 = ROL64((argA2^De), 10)@
+ @d3 = ROL64((argA3^Di), 15)@
+ @d4 = ROL64((argA4^Do), 56)@
+ @d0 = ROL64((argA5^Du), 27)@
+ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
+ @argA2 = Be ^((~Bi)& Bo )@
+ @argA3 = Bi ^((~Bo)& Bu )@
+ @argA4 = Bo ^((~Bu)& Ba )@
+ @argA5 = Bu ^((~Ba)& Be )@
+
+ veor.64 \argA2, \argA2, d8
+ veor.64 \argA3, \argA3, d9
+ vshl.u64 d2, \argA2, #10
+ vldr.64 d6, [sp, #\argA1]
+ vshl.u64 d3, \argA3, #15
+ veor.64 \argA4, \argA4, d10
+ vsri.64 d2, \argA2, #64-10
+ vsri.64 d3, \argA3, #64-15
+ veor.64 \argA5, \argA5, d11
+ vext.8 d4, \argA4, \argA4, #1
+ vbic.64 \argA2, d3, d2
+ vshl.u64 d0, \argA5, #27
+ veor.64 d6, d6, d7
+ vbic.64 \argA3, d4, d3
+ vsri.64 d0, \argA5, #64-27
+ vshl.u64 d1, d6, #36
+ veor.64 \argA3, d2
+ vbic.64 \argA4, d0, d4
+ vsri.64 d1, d6, #64-36
+
+ veor.64 \argA4, d3
+ vbic.64 d6, d2, d1
+ vbic.64 \argA5, d1, d0
+ veor.64 d6, d0
+ veor.64 \argA2, d1
+ vstr.64 d6, [sp, #\argA1]
+ veor.64 d5, d6
+ veor.64 \argA5, d4
+
+ .endm
+
+.macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
+
+ @d3 = ROL64((argA1^Da), 41)@
+ @d4 = ROL64((argA2^De), 2)@
+ @d0 = ROL64((argA3^Di), 62)@
+ @d1 = ROL64((argA4^Do), 55)@
+ @d2 = ROL64((argA5^Du), 39)@
+ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
+ @argA2 = Be ^((~Bi)& Bo )@
+ @argA3 = Bi ^((~Bo)& Bu )@
+ @argA4 = Bo ^((~Bu)& Ba )@
+ @argA5 = Bu ^((~Ba)& Be )@
+
+ veor.64 \argA2, \argA2, d8
+ veor.64 \argA3, \argA3, d9
+ vshl.u64 d4, \argA2, #2
+ veor.64 \argA5, \argA5, d11
+ vshl.u64 d0, \argA3, #62
+ vldr.64 d6, [sp, #\argA1]
+ vsri.64 d4, \argA2, #64-2
+ veor.64 \argA4, \argA4, d10
+ vsri.64 d0, \argA3, #64-62
+
+ vshl.u64 d1, \argA4, #55
+ veor.64 d6, d6, d7
+ vshl.u64 d2, \argA5, #39
+ vsri.64 d1, \argA4, #64-55
+ vbic.64 \argA4, d0, d4
+ vsri.64 d2, \argA5, #64-39
+ vbic.64 \argA2, d1, d0
+ vshl.u64 d3, d6, #41
+ veor.64 \argA5, d4, \argA2
+ vbic.64 \argA2, d2, d1
+ vsri.64 d3, d6, #64-41
+ veor.64 d6, d0, \argA2
+
+ vbic.64 \argA2, d3, d2
+ vbic.64 \argA3, d4, d3
+ veor.64 \argA2, d1
+ vstr.64 d6, [sp, #\argA1]
+ veor.64 d5, d6
+ veor.64 \argA3, d2
+ veor.64 \argA4, d3
+
+ .endm
+
+
+@// --- code
+
+@not callable from C!
+.p2align 3
+.type KeccakF_armv7a_neon_asm,%function;
+KeccakF_armv7a_neon_asm: @
+
+.LroundLoop:
+
+ KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31
+ KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28
+ KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30
+ KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27
+ KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29
+
+ KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29
+ KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28
+ KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27
+ KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31
+ KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30
+
+ KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30
+ KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28
+ KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31
+ KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29
+ KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27
+
+ KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27
+ KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28
+ KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29
+ ldr r0, [ip]
+ KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30
+ cmp r0, #0xFFFFFFFF
+ KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31
+
+ bne .LroundLoop
+ sub ip, #(8*24)
+ bx lr
+.p2align 2
+.ltorg
+.size KeccakF_armv7a_neon_asm,.-KeccakF_armv7a_neon_asm;
+
+
+@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state) callable from C
+.p2align 3
+.global _gcry_keccak_permute_armv7_neon
+.type _gcry_keccak_permute_armv7_neon,%function;
+_gcry_keccak_permute_armv7_neon:
+
+ push {ip, lr}
+ vpush {q4-q7}
+ sub sp,sp, #5*8
+
+ vldr.64 d0, [r0, #0*8]
+ vldr.64 d12, [r0, #1*8]
+ vldr.64 d17, [r0, #2*8]
+ vldr.64 d22, [r0, #3*8]
+ vldr.64 d27, [r0, #4*8]
+
+ GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr);
+
+ vldr.64 d1, [r0, #5*8]
+ vldr.64 d13, [r0, #6*8]
+ vldr.64 d18, [r0, #7*8]
+ vldr.64 d23, [r0, #8*8]
+ vldr.64 d28, [r0, #9*8]
+
+ vldr.64 d2, [r0, #10*8]
+ vldr.64 d14, [r0, #11*8]
+ vldr.64 d19, [r0, #12*8]
+ vldr.64 d24, [r0, #13*8]
+ vldr.64 d29, [r0, #14*8]
+
+ vldr.64 d3, [r0, #15*8]
+ vldr.64 d15, [r0, #16*8]
+ vldr.64 d20, [r0, #17*8]
+ vldr.64 d25, [r0, #18*8]
+ vldr.64 d30, [r0, #19*8]
+
+ vldr.64 d4, [r0, #20*8]
+ vldr.64 d16, [r0, #21*8]
+ vldr.64 d21, [r0, #22*8]
+ vldr.64 d26, [r0, #23*8]
+ vldr.64 d31, [r0, #24*8]
+
+ vstr.64 d0, [sp, #Aba]
+ vstr.64 d1, [sp, #Aga]
+ veor.64 q0, q0, q1
+ vstr.64 d2, [sp, #Aka]
+ veor.64 d5, d0, d1
+ vstr.64 d3, [sp, #Ama]
+ mov r1, r0
+ vstr.64 d4, [sp, #Asa]
+ veor.64 d5, d5, d4
+
+ bl KeccakF_armv7a_neon_asm
+
+ vpop.64 { d0- d4 }
+
+ vstr.64 d0, [r1, #0*8]
+ vstr.64 d12, [r1, #1*8]
+ vstr.64 d17, [r1, #2*8]
+ vstr.64 d22, [r1, #3*8]
+ vstr.64 d27, [r1, #4*8]
+
+ vstr.64 d1, [r1, #5*8]
+ vstr.64 d13, [r1, #6*8]
+ vstr.64 d18, [r1, #7*8]
+ vstr.64 d23, [r1, #8*8]
+ vstr.64 d28, [r1, #9*8]
+
+ vstr.64 d2, [r1, #10*8]
+ vstr.64 d14, [r1, #11*8]
+ vstr.64 d19, [r1, #12*8]
+ vstr.64 d24, [r1, #13*8]
+ vstr.64 d29, [r1, #14*8]
+
+ vstr.64 d3, [r1, #15*8]
+ vstr.64 d15, [r1, #16*8]
+ vstr.64 d20, [r1, #17*8]
+ vstr.64 d25, [r1, #18*8]
+ vstr.64 d30, [r1, #19*8]
+
+ vstr.64 d4, [r1, #20*8]
+ vstr.64 d16, [r1, #21*8]
+ vstr.64 d21, [r1, #22*8]
+ vstr.64 d26, [r1, #23*8]
+ vstr.64 d31, [r1, #24*8]
+
+ mov r0, #112
+ vpop {q4-q7}
+ pop {ip, pc}
+.p2align 2
+.ltorg
+.size _gcry_keccak_permute_armv7_neon,.-_gcry_keccak_permute_armv7_neon;
+
+@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state, @r4
+@ int pos, @r1
+@ const byte *lanes, @r2
+@ unsigned int nlanes, @r3
+@ int blocklanes) @ r5 callable from C
+.p2align 3
+.global _gcry_keccak_absorb_lanes64_armv7_neon
+.type _gcry_keccak_absorb_lanes64_armv7_neon,%function;
+_gcry_keccak_absorb_lanes64_armv7_neon:
+
+ cmp r3, #0 @ nlanes == 0
+ itt eq
+ moveq r0, #0
+ bxeq lr
+
+ push {r4-r5, ip, lr}
+ beq .Lout
+ mov r4, r0
+ ldr r5, [sp, #(4*4)]
+ vpush {q4-q7}
+
+ @ load state
+ vldr.64 d0, [r4, #0*8]
+ vldr.64 d12, [r4, #1*8]
+ vldr.64 d17, [r4, #2*8]
+ vldr.64 d22, [r4, #3*8]
+ vldr.64 d27, [r4, #4*8]
+
+ GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr);
+
+ vldr.64 d1, [r4, #5*8]
+ vldr.64 d13, [r4, #6*8]
+ vldr.64 d18, [r4, #7*8]
+ vldr.64 d23, [r4, #8*8]
+ vldr.64 d28, [r4, #9*8]
+
+ vldr.64 d2, [r4, #10*8]
+ vldr.64 d14, [r4, #11*8]
+ vldr.64 d19, [r4, #12*8]
+ vldr.64 d24, [r4, #13*8]
+ vldr.64 d29, [r4, #14*8]
+
+ vldr.64 d3, [r4, #15*8]
+ vldr.64 d15, [r4, #16*8]
+ vldr.64 d20, [r4, #17*8]
+ vldr.64 d25, [r4, #18*8]
+ vldr.64 d30, [r4, #19*8]
+
+ vldr.64 d4, [r4, #20*8]
+ vldr.64 d16, [r4, #21*8]
+ vldr.64 d21, [r4, #22*8]
+ vldr.64 d26, [r4, #23*8]
+ vldr.64 d31, [r4, #24*8]
+
+.Lmain_loop:
+
+ @ detect absorb mode (full blocks vs lanes)
+
+ cmp r1, #0 @ pos != 0
+ bne .Llanes_loop
+
+.Lmain_loop_pos0:
+
+ @ full blocks mode
+
+ @ switch (blocksize)
+ cmp r5, #21
+ beq .Lfull_block_21
+ cmp r5, #18
+ beq .Lfull_block_18
+ cmp r5, #17
+ beq .Lfull_block_17
+ cmp r5, #13
+ beq .Lfull_block_13
+ cmp r5, #9
+ beq .Lfull_block_9
+
+ @ unknown blocksize
+ b .Llanes_loop
+
+.Lfull_block_21:
+
+ @ SHAKE128
+
+ cmp r3, #21 @ nlanes < blocklanes
+ blo .Llanes_loop
+
+ sub sp,sp, #5*8
+
+ vld1.64 {d5-d8}, [r2]!
+ veor d0, d5
+ vld1.64 {d9-d11}, [r2]!
+ veor d12, d6
+ veor d17, d7
+ veor d22, d8
+ vld1.64 {d5-d8}, [r2]!
+ veor d27, d9
+
+ veor d1, d10
+ veor d13, d11
+ vld1.64 {d9-d11}, [r2]!
+ veor d18, d5
+ veor d23, d6
+ veor d28, d7
+
+ veor d2, d8
+ vld1.64 {d5-d8}, [r2]!
+ veor d14, d9
+ veor d19, d10
+ veor d24, d11
+ vld1.64 {d9-d11}, [r2]!
+ veor d29, d5
+
+ veor d3, d6
+ veor d15, d7
+ veor d20, d8
+ veor d25, d9
+ veor d30, d10
+
+ veor d4, d11
+
+ vstr.64 d0, [sp, #Aba]
+ vstr.64 d1, [sp, #Aga]
+ veor.64 q0, q0, q1
+ vstr.64 d2, [sp, #Aka]
+ veor.64 d5, d0, d1
+ vstr.64 d3, [sp, #Ama]
+ vstr.64 d4, [sp, #Asa]
+ veor.64 d5, d5, d4
+
+ bl KeccakF_armv7a_neon_asm
+
+ subs r3, #21 @ nlanes -= 21
+ vpop.64 { d0-d4 }
+
+ beq .Ldone
+
+ b .Lfull_block_21
+
+.Lfull_block_18:
+
+ @ SHA3-224
+
+ cmp r3, #18 @ nlanes < blocklanes
+ blo .Llanes_loop
+
+ sub sp,sp, #5*8
+
+ vld1.64 {d5-d8}, [r2]!
+ veor d0, d5
+ vld1.64 {d9-d11}, [r2]!
+ veor d12, d6
+ veor d17, d7
+ veor d22, d8
+ vld1.64 {d5-d8}, [r2]!
+ veor d27, d9
+
+ veor d1, d10
+ veor d13, d11
+ vld1.64 {d9-d11}, [r2]!
+ veor d18, d5
+ veor d23, d6
+ veor d28, d7
+
+ veor d2, d8
+ vld1.64 {d5-d8}, [r2]!
+ veor d14, d9
+ veor d19, d10
+ veor d24, d11
+ veor d29, d5
+
+ veor d3, d6
+ veor d15, d7
+ veor d20, d8
+
+ vstr.64 d0, [sp, #Aba]
+ vstr.64 d1, [sp, #Aga]
+ veor.64 q0, q0, q1
+ vstr.64 d2, [sp, #Aka]
+ veor.64 d5, d0, d1
+ vstr.64 d3, [sp, #Ama]
+ vstr.64 d4, [sp, #Asa]
+ veor.64 d5, d5, d4
+
+ bl KeccakF_armv7a_neon_asm
+
+ subs r3, #18 @ nlanes -= 18
+ vpop.64 { d0-d4 }
+
+ beq .Ldone
+
+ b .Lfull_block_18
+
+.Lfull_block_17:
+
+ @ SHA3-256 & SHAKE256
+
+ cmp r3, #17 @ nlanes < blocklanes
+ blo .Llanes_loop
+
+ sub sp,sp, #5*8
+
+ vld1.64 {d5-d8}, [r2]!
+ veor d0, d5
+ vld1.64 {d9-d11}, [r2]!
+ veor d12, d6
+ veor d17, d7
+ veor d22, d8
+ vld1.64 {d5-d8}, [r2]!
+ veor d27, d9
+
+ veor d1, d10
+ veor d13, d11
+ vld1.64 {d9-d11}, [r2]!
+ veor d18, d5
+ veor d23, d6
+ veor d28, d7
+
+ veor d2, d8
+ vld1.64 {d5-d7}, [r2]!
+ veor d14, d9
+ veor d19, d10
+ veor d24, d11
+ veor d29, d5
+
+ veor d3, d6
+ veor d15, d7
+
+ vstr.64 d0, [sp, #Aba]
+ vstr.64 d1, [sp, #Aga]
+ veor.64 q0, q0, q1
+ vstr.64 d2, [sp, #Aka]
+ veor.64 d5, d0, d1
+ vstr.64 d3, [sp, #Ama]
+ vstr.64 d4, [sp, #Asa]
+ veor.64 d5, d5, d4
+
+ bl KeccakF_armv7a_neon_asm
+
+ subs r3, #17 @ nlanes -= 17
+ vpop.64 { d0-d4 }
+
+ beq .Ldone
+
+ b .Lfull_block_17
+
+.Lfull_block_13:
+
+ @ SHA3-384
+
+ cmp r3, #13 @ nlanes < blocklanes
+ blo .Llanes_loop
+
+ sub sp,sp, #5*8
+
+ vld1.64 {d5-d8}, [r2]!
+ veor d0, d5
+ vld1.64 {d9-d11}, [r2]!
+ veor d12, d6
+ veor d17, d7
+ veor d22, d8
+ vld1.64 {d5-d8}, [r2]!
+ veor d27, d9
+
+ veor d1, d10
+ veor d13, d11
+ vld1.64 {d9-d10}, [r2]!
+ veor d18, d5
+ veor d23, d6
+ veor d28, d7
+
+ veor d2, d8
+ veor d14, d9
+ veor d19, d10
+
+ vstr.64 d0, [sp, #Aba]
+ vstr.64 d1, [sp, #Aga]
+ veor.64 q0, q0, q1
+ vstr.64 d2, [sp, #Aka]
+ veor.64 d5, d0, d1
+ vstr.64 d3, [sp, #Ama]
+ vstr.64 d4, [sp, #Asa]
+ veor.64 d5, d5, d4
+
+ bl KeccakF_armv7a_neon_asm
+
+ subs r3, #13 @ nlanes -= 13
+ vpop.64 { d0-d4 }
+
+ beq .Ldone
+
+ b .Lfull_block_13
+
+.Lfull_block_9:
+
+ @ SHA3-512
+
+ cmp r3, #9 @ nlanes < blocklanes
+ blo .Llanes_loop
+
+ sub sp,sp, #5*8
+
+ vld1.64 {d5-d8}, [r2]!
+ veor d0, d5
+ vld1.64 {d9-d11}, [r2]!
+ veor d12, d6
+ veor d17, d7
+ veor d22, d8
+ vld1.64 {d5-d6}, [r2]!
+ veor d27, d9
+
+ veor d1, d10
+ veor d13, d11
+ veor d18, d5
+ veor d23, d6
+
+ vstr.64 d0, [sp, #Aba]
+ vstr.64 d1, [sp, #Aga]
+ veor.64 q0, q0, q1
+ vstr.64 d2, [sp, #Aka]
+ veor.64 d5, d0, d1
+ vstr.64 d3, [sp, #Ama]
+ vstr.64 d4, [sp, #Asa]
+ veor.64 d5, d5, d4
+
+ bl KeccakF_armv7a_neon_asm
+
+ subs r3, #9 @ nlanes -= 9
+ vpop.64 { d0-d4 }
+
+ beq .Ldone
+
+ b .Lfull_block_9
+
+.Llanes_loop:
+
+ @ per-lane mode
+
+ @ switch (pos)
+ ldrb r0, [pc, r1]
+ add pc, pc, r0, lsl #2
+.Lswitch_table:
+ .byte (.Llane0-.Lswitch_table-4)/4
+ .byte (.Llane1-.Lswitch_table-4)/4
+ .byte (.Llane2-.Lswitch_table-4)/4
+ .byte (.Llane3-.Lswitch_table-4)/4
+ .byte (.Llane4-.Lswitch_table-4)/4
+ .byte (.Llane5-.Lswitch_table-4)/4
+ .byte (.Llane6-.Lswitch_table-4)/4
+ .byte (.Llane7-.Lswitch_table-4)/4
+ .byte (.Llane8-.Lswitch_table-4)/4
+ .byte (.Llane9-.Lswitch_table-4)/4
+ .byte (.Llane10-.Lswitch_table-4)/4
+ .byte (.Llane11-.Lswitch_table-4)/4
+ .byte (.Llane12-.Lswitch_table-4)/4
+ .byte (.Llane13-.Lswitch_table-4)/4
+ .byte (.Llane14-.Lswitch_table-4)/4
+ .byte (.Llane15-.Lswitch_table-4)/4
+ .byte (.Llane16-.Lswitch_table-4)/4
+ .byte (.Llane17-.Lswitch_table-4)/4
+ .byte (.Llane18-.Lswitch_table-4)/4
+ .byte (.Llane19-.Lswitch_table-4)/4
+ .byte (.Llane20-.Lswitch_table-4)/4
+ .byte (.Llane21-.Lswitch_table-4)/4
+ .byte (.Llane22-.Lswitch_table-4)/4
+ .byte (.Llane23-.Lswitch_table-4)/4
+ .byte (.Llane24-.Lswitch_table-4)/4
+.p2align 2
+
+#define ABSORB_LANE(label, vreg) \
+ label: \
+ add r1, #1; \
+ vld1.64 d5, [r2]!; \
+ cmp r1, r5; /* pos == blocklanes */ \
+ veor vreg, vreg, d5; \
+ beq .Llanes_permute; \
+ subs r3, #1; \
+ beq .Ldone;
+
+ ABSORB_LANE(.Llane0, d0)
+ ABSORB_LANE(.Llane1, d12)
+ ABSORB_LANE(.Llane2, d17)
+ ABSORB_LANE(.Llane3, d22)
+ ABSORB_LANE(.Llane4, d27)
+
+ ABSORB_LANE(.Llane5, d1)
+ ABSORB_LANE(.Llane6, d13)
+ ABSORB_LANE(.Llane7, d18)
+ ABSORB_LANE(.Llane8, d23)
+ ABSORB_LANE(.Llane9, d28)
+
+ ABSORB_LANE(.Llane10, d2)
+ ABSORB_LANE(.Llane11, d14)
+ ABSORB_LANE(.Llane12, d19)
+ ABSORB_LANE(.Llane13, d24)
+ ABSORB_LANE(.Llane14, d29)
+
+ ABSORB_LANE(.Llane15, d3)
+ ABSORB_LANE(.Llane16, d15)
+ ABSORB_LANE(.Llane17, d20)
+ ABSORB_LANE(.Llane18, d25)
+ ABSORB_LANE(.Llane19, d30)
+
+ ABSORB_LANE(.Llane20, d4)
+ ABSORB_LANE(.Llane21, d16)
+ ABSORB_LANE(.Llane22, d21)
+ ABSORB_LANE(.Llane23, d26)
+ ABSORB_LANE(.Llane24, d31)
+
+ b .Llanes_loop
+
+.Llanes_permute:
+
+ sub sp,sp, #5*8
+ vstr.64 d0, [sp, #Aba]
+ vstr.64 d1, [sp, #Aga]
+ veor.64 q0, q0, q1
+ vstr.64 d2, [sp, #Aka]
+ veor.64 d5, d0, d1
+ vstr.64 d3, [sp, #Ama]
+ vstr.64 d4, [sp, #Asa]
+ veor.64 d5, d5, d4
+
+ bl KeccakF_armv7a_neon_asm
+
+ mov r1, #0 @ pos <= 0
+ subs r3, #1
+
+ vpop.64 { d0-d4 }
+
+ beq .Ldone
+
+ b .Lmain_loop_pos0
+
+.Ldone:
+
+ @ save state
+ vstr.64 d0, [r4, #0*8]
+ vstr.64 d12, [r4, #1*8]
+ vstr.64 d17, [r4, #2*8]
+ vstr.64 d22, [r4, #3*8]
+ vstr.64 d27, [r4, #4*8]
+
+ vstr.64 d1, [r4, #5*8]
+ vstr.64 d13, [r4, #6*8]
+ vstr.64 d18, [r4, #7*8]
+ vstr.64 d23, [r4, #8*8]
+ vstr.64 d28, [r4, #9*8]
+
+ vstr.64 d2, [r4, #10*8]
+ vstr.64 d14, [r4, #11*8]
+ vstr.64 d19, [r4, #12*8]
+ vstr.64 d24, [r4, #13*8]
+ vstr.64 d29, [r4, #14*8]
+
+ vstr.64 d3, [r4, #15*8]
+ vstr.64 d15, [r4, #16*8]
+ vstr.64 d20, [r4, #17*8]
+ vstr.64 d25, [r4, #18*8]
+ vstr.64 d30, [r4, #19*8]
+
+ vstr.64 d4, [r4, #20*8]
+ vstr.64 d16, [r4, #21*8]
+ vstr.64 d21, [r4, #22*8]
+ vstr.64 d26, [r4, #23*8]
+ vstr.64 d31, [r4, #24*8]
+
+ mov r0, #120
+ vpop {q4-q7}
+.Lout:
+ pop {r4-r5, ip, pc}
+.p2align 2
+.ltorg
+.size _gcry_keccak_absorb_lanes64_armv7_neon,.-_gcry_keccak_absorb_lanes64_armv7_neon;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/keccak.c b/comm/third_party/libgcrypt/cipher/keccak.c
new file mode 100644
index 0000000000..795a02e5b9
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/keccak.c
@@ -0,0 +1,1577 @@
+/* keccak.c - SHA3 hash functions
+ * Copyright (C) 2015 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <config.h>
+#include <string.h>
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+
+/* USE_64BIT indicates whether to use 64-bit generic implementation.
+ * USE_32BIT indicates whether to use 32-bit generic implementation. */
+#undef USE_64BIT
+#if defined(__x86_64__) || SIZEOF_UNSIGNED_LONG == 8
+# define USE_64BIT 1
+#else
+# define USE_32BIT 1
+#endif
+
+
+/* USE_64BIT_BMI2 indicates whether to compile with 64-bit Intel BMI2 code. */
+#undef USE_64BIT_BMI2
+#if defined(USE_64BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ defined(HAVE_CPU_ARCH_X86)
+# define USE_64BIT_BMI2 1
+#endif
+
+
+/* USE_64BIT_SHLD indicates whether to compile with 64-bit Intel SHLD code. */
+#undef USE_64BIT_SHLD
+#if defined(USE_64BIT) && defined (__GNUC__) && defined(__x86_64__) && \
+ defined(HAVE_CPU_ARCH_X86)
+# define USE_64BIT_SHLD 1
+#endif
+
+
+/* USE_32BIT_BMI2 indicates whether to compile with 32-bit Intel BMI2 code. */
+#undef USE_32BIT_BMI2
+#if defined(USE_32BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ defined(HAVE_CPU_ARCH_X86)
+# define USE_32BIT_BMI2 1
+#endif
+
+
+/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly
+ * code. */
+#undef USE_64BIT_ARM_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_64BIT_ARM_NEON 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+
+#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON)
+# define NEED_COMMON64 1
+#endif
+
+#ifdef USE_32BIT
+# define NEED_COMMON32BI 1
+#endif
+
+
+#define SHA3_DELIMITED_SUFFIX 0x06
+#define SHAKE_DELIMITED_SUFFIX 0x1F
+
+
+typedef struct
+{
+ union {
+#ifdef NEED_COMMON64
+ u64 state64[25];
+#endif
+#ifdef NEED_COMMON32BI
+ u32 state32bi[50];
+#endif
+ } u;
+} KECCAK_STATE;
+
+
+typedef struct
+{
+ unsigned int (*permute)(KECCAK_STATE *hd);
+ unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes,
+ unsigned int nlanes, int blocklanes);
+ unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+ unsigned int outlen);
+} keccak_ops_t;
+
+
+typedef struct KECCAK_CONTEXT_S
+{
+ KECCAK_STATE state;
+ unsigned int outlen;
+ unsigned int blocksize;
+ unsigned int count;
+ unsigned int suffix;
+ const keccak_ops_t *ops;
+#ifdef USE_S390X_CRYPTO
+ unsigned int kimd_func;
+ unsigned int buf_pos;
+ byte buf[1344 / 8]; /* SHAKE128 requires biggest buffer, 1344 bits. */
+#endif
+} KECCAK_CONTEXT;
+
+
+
+#ifdef NEED_COMMON64
+
+const u64 _gcry_keccak_round_consts_64bit[24 + 1] =
+{
+ U64_C(0x0000000000000001), U64_C(0x0000000000008082),
+ U64_C(0x800000000000808A), U64_C(0x8000000080008000),
+ U64_C(0x000000000000808B), U64_C(0x0000000080000001),
+ U64_C(0x8000000080008081), U64_C(0x8000000000008009),
+ U64_C(0x000000000000008A), U64_C(0x0000000000000088),
+ U64_C(0x0000000080008009), U64_C(0x000000008000000A),
+ U64_C(0x000000008000808B), U64_C(0x800000000000008B),
+ U64_C(0x8000000000008089), U64_C(0x8000000000008003),
+ U64_C(0x8000000000008002), U64_C(0x8000000000000080),
+ U64_C(0x000000000000800A), U64_C(0x800000008000000A),
+ U64_C(0x8000000080008081), U64_C(0x8000000000008080),
+ U64_C(0x0000000080000001), U64_C(0x8000000080008008),
+ U64_C(0xFFFFFFFFFFFFFFFF)
+};
+
+static unsigned int
+keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+ unsigned int outlen)
+{
+ unsigned int i;
+
+ /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
+
+ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
+ {
+ u64 tmp = hd->u.state64[i];
+ buf_put_le64(outbuf, tmp);
+ outbuf += 8;
+ }
+
+ return 0;
+}
+
+#endif /* NEED_COMMON64 */
+
+
+#ifdef NEED_COMMON32BI
+
+static const u32 round_consts_32bit[2 * 24] =
+{
+ 0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL,
+ 0x00000000UL, 0x8000008bUL, 0x00000000UL, 0x80008080UL,
+ 0x00000001UL, 0x0000008bUL, 0x00000001UL, 0x00008000UL,
+ 0x00000001UL, 0x80008088UL, 0x00000001UL, 0x80000082UL,
+ 0x00000000UL, 0x0000000bUL, 0x00000000UL, 0x0000000aUL,
+ 0x00000001UL, 0x00008082UL, 0x00000000UL, 0x00008003UL,
+ 0x00000001UL, 0x0000808bUL, 0x00000001UL, 0x8000000bUL,
+ 0x00000001UL, 0x8000008aUL, 0x00000001UL, 0x80000081UL,
+ 0x00000000UL, 0x80000081UL, 0x00000000UL, 0x80000008UL,
+ 0x00000000UL, 0x00000083UL, 0x00000000UL, 0x80008003UL,
+ 0x00000001UL, 0x80008088UL, 0x00000000UL, 0x80000088UL,
+ 0x00000001UL, 0x00008000UL, 0x00000000UL, 0x80008082UL
+};
+
+static unsigned int
+keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+ unsigned int outlen)
+{
+ unsigned int i;
+ u32 x0;
+ u32 x1;
+ u32 t;
+
+ /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
+
+ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
+ {
+ x0 = hd->u.state32bi[i * 2 + 0];
+ x1 = hd->u.state32bi[i * 2 + 1];
+
+ t = (x0 & 0x0000FFFFUL) + (x1 << 16);
+ x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL);
+ x0 = t;
+ t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8);
+ t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4);
+ t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2);
+ t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1);
+ t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8);
+ t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4);
+ t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2);
+ t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1);
+
+ buf_put_le32(&outbuf[0], x0);
+ buf_put_le32(&outbuf[4], x1);
+ outbuf += 8;
+ }
+
+ return 0;
+}
+
+static inline void
+keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1)
+{
+ u32 t;
+
+ t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1);
+ t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2);
+ t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4);
+ t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8);
+ t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1);
+ t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2);
+ t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4);
+ t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8);
+ lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16);
+ lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL);
+}
+
+#endif /* NEED_COMMON32BI */
+
+
+/* Construct generic 64-bit implementation. */
+#ifdef USE_64BIT
+
+#if __GNUC__ >= 4 && defined(__x86_64__)
+
+static inline void absorb_lanes64_8(u64 *dst, const byte *in)
+{
+ asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
+ "movdqu 0*16(%[in]), %%xmm4\n\t"
+ "movdqu 1*16(%[dst]), %%xmm1\n\t"
+ "movdqu 1*16(%[in]), %%xmm5\n\t"
+ "movdqu 2*16(%[dst]), %%xmm2\n\t"
+ "movdqu 3*16(%[dst]), %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu 2*16(%[in]), %%xmm4\n\t"
+ "movdqu 3*16(%[in]), %%xmm5\n\t"
+ "movdqu %%xmm0, 0*16(%[dst])\n\t"
+ "pxor %%xmm4, %%xmm2\n\t"
+ "movdqu %%xmm1, 1*16(%[dst])\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm2, 2*16(%[dst])\n\t"
+ "movdqu %%xmm3, 3*16(%[dst])\n\t"
+ :
+ : [dst] "r" (dst), [in] "r" (in)
+ : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");
+}
+
+static inline void absorb_lanes64_4(u64 *dst, const byte *in)
+{
+ asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
+ "movdqu 0*16(%[in]), %%xmm4\n\t"
+ "movdqu 1*16(%[dst]), %%xmm1\n\t"
+ "movdqu 1*16(%[in]), %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm0, 0*16(%[dst])\n\t"
+ "movdqu %%xmm1, 1*16(%[dst])\n\t"
+ :
+ : [dst] "r" (dst), [in] "r" (in)
+ : "xmm0", "xmm1", "xmm4", "xmm5", "memory");
+}
+
+static inline void absorb_lanes64_2(u64 *dst, const byte *in)
+{
+ asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
+ "movdqu 0*16(%[in]), %%xmm4\n\t"
+ "pxor %%xmm4, %%xmm0\n\t"
+ "movdqu %%xmm0, 0*16(%[dst])\n\t"
+ :
+ : [dst] "r" (dst), [in] "r" (in)
+ : "xmm0", "xmm4", "memory");
+}
+
+#else /* __x86_64__ */
+
+static inline void absorb_lanes64_8(u64 *dst, const byte *in)
+{
+ dst[0] ^= buf_get_le64(in + 8 * 0);
+ dst[1] ^= buf_get_le64(in + 8 * 1);
+ dst[2] ^= buf_get_le64(in + 8 * 2);
+ dst[3] ^= buf_get_le64(in + 8 * 3);
+ dst[4] ^= buf_get_le64(in + 8 * 4);
+ dst[5] ^= buf_get_le64(in + 8 * 5);
+ dst[6] ^= buf_get_le64(in + 8 * 6);
+ dst[7] ^= buf_get_le64(in + 8 * 7);
+}
+
+static inline void absorb_lanes64_4(u64 *dst, const byte *in)
+{
+ dst[0] ^= buf_get_le64(in + 8 * 0);
+ dst[1] ^= buf_get_le64(in + 8 * 1);
+ dst[2] ^= buf_get_le64(in + 8 * 2);
+ dst[3] ^= buf_get_le64(in + 8 * 3);
+}
+
+static inline void absorb_lanes64_2(u64 *dst, const byte *in)
+{
+ dst[0] ^= buf_get_le64(in + 8 * 0);
+ dst[1] ^= buf_get_le64(in + 8 * 1);
+}
+
+#endif /* !__x86_64__ */
+
+static inline void absorb_lanes64_1(u64 *dst, const byte *in)
+{
+ dst[0] ^= buf_get_le64(in + 8 * 0);
+}
+
+
+# define ANDN64(x, y) (~(x) & (y))
+# define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \
+ ((x) >> ((64 - (unsigned int)(n)) & 63)))
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64
+# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64
+# include "keccak_permute_64.h"
+
+# undef ANDN64
+# undef ROL64
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+# undef KECCAK_F1600_ABSORB_FUNC_NAME
+
+static const keccak_ops_t keccak_generic64_ops =
+{
+ .permute = keccak_f1600_state_permute64,
+ .absorb = keccak_absorb_lanes64,
+ .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT */
+
+
+/* Construct 64-bit Intel SHLD implementation. */
+#ifdef USE_64BIT_SHLD
+
+# define ANDN64(x, y) (~(x) & (y))
+# define ROL64(x, n) ({ \
+ u64 tmp = (x); \
+ asm ("shldq %1, %0, %0" \
+ : "+r" (tmp) \
+ : "J" ((n) & 63) \
+ : "cc"); \
+ tmp; })
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld
+# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld
+# include "keccak_permute_64.h"
+
+# undef ANDN64
+# undef ROL64
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+# undef KECCAK_F1600_ABSORB_FUNC_NAME
+
+static const keccak_ops_t keccak_shld_64_ops =
+{
+ .permute = keccak_f1600_state_permute64_shld,
+ .absorb = keccak_absorb_lanes64_shld,
+ .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT_SHLD */
+
+
+/* Construct 64-bit Intel BMI2 implementation. */
+#ifdef USE_64BIT_BMI2
+
+# define ANDN64(x, y) ({ \
+ u64 tmp; \
+ asm ("andnq %2, %1, %0" \
+ : "=r" (tmp) \
+ : "r0" (x), "rm" (y)); \
+ tmp; })
+
+# define ROL64(x, n) ({ \
+ u64 tmp; \
+ asm ("rorxq %2, %1, %0" \
+ : "=r" (tmp) \
+ : "rm0" (x), "J" (64 - ((n) & 63))); \
+ tmp; })
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2
+# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2
+# include "keccak_permute_64.h"
+
+# undef ANDN64
+# undef ROL64
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+# undef KECCAK_F1600_ABSORB_FUNC_NAME
+
+static const keccak_ops_t keccak_bmi2_64_ops =
+{
+ .permute = keccak_f1600_state_permute64_bmi2,
+ .absorb = keccak_absorb_lanes64_bmi2,
+ .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT_BMI2 */
+
+
+/* 64-bit ARMv7/NEON implementation. */
+#ifdef USE_64BIT_ARM_NEON
+
+unsigned int _gcry_keccak_permute_armv7_neon(u64 *state);
+unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos,
+ const byte *lanes,
+ unsigned int nlanes,
+ int blocklanes);
+
+static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd)
+{
+ return _gcry_keccak_permute_armv7_neon(hd->u.state64);
+}
+
+static unsigned int
+keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes,
+ unsigned int nlanes, int blocklanes)
+{
+ if (blocklanes < 0)
+ {
+ /* blocklanes == -1, permutationless absorb from keccak_final. */
+
+ while (nlanes)
+ {
+ hd->u.state64[pos] ^= buf_get_le64(lanes);
+ lanes += 8;
+ nlanes--;
+ }
+
+ return 0;
+ }
+ else
+ {
+ return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes,
+ nlanes, blocklanes);
+ }
+}
+
+static const keccak_ops_t keccak_armv7_neon_64_ops =
+{
+ .permute = keccak_permute64_armv7_neon,
+ .absorb = keccak_absorb_lanes64_armv7_neon,
+ .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT_ARM_NEON */
+
+
+/* Construct generic 32-bit implementation. */
+#ifdef USE_32BIT
+
+# define ANDN32(x, y) (~(x) & (y))
+# define ROL32(x, n) (((x) << ((unsigned int)n & 31)) | \
+ ((x) >> ((32 - (unsigned int)(n)) & 31)))
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi
+# include "keccak_permute_32.h"
+
+# undef ANDN32
+# undef ROL32
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+
+static unsigned int
+keccak_absorb_lanes32bi(KECCAK_STATE *hd, int pos, const byte *lanes,
+ unsigned int nlanes, int blocklanes)
+{
+ unsigned int burn = 0;
+
+ while (nlanes)
+ {
+ keccak_absorb_lane32bi(&hd->u.state32bi[pos * 2],
+ buf_get_le32(lanes + 0),
+ buf_get_le32(lanes + 4));
+ lanes += 8;
+ nlanes--;
+
+ if (++pos == blocklanes)
+ {
+ burn = keccak_f1600_state_permute32bi(hd);
+ pos = 0;
+ }
+ }
+
+ return burn;
+}
+
+static const keccak_ops_t keccak_generic32bi_ops =
+{
+ .permute = keccak_f1600_state_permute32bi,
+ .absorb = keccak_absorb_lanes32bi,
+ .extract = keccak_extract32bi,
+};
+
+#endif /* USE_32BIT */
+
+
+/* Construct 32-bit Intel BMI2 implementation. */
+#ifdef USE_32BIT_BMI2
+
+# define ANDN32(x, y) ({ \
+ u32 tmp; \
+ asm ("andnl %2, %1, %0" \
+ : "=r" (tmp) \
+ : "r0" (x), "rm" (y)); \
+ tmp; })
+
+# define ROL32(x, n) ({ \
+ u32 tmp; \
+ asm ("rorxl %2, %1, %0" \
+ : "=r" (tmp) \
+ : "rm0" (x), "J" (32 - ((n) & 31))); \
+ tmp; })
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi_bmi2
+# include "keccak_permute_32.h"
+
+# undef ANDN32
+# undef ROL32
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+
+static inline u32 pext(u32 x, u32 mask)
+{
+ u32 tmp;
+ asm ("pextl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask));
+ return tmp;
+}
+
+static inline u32 pdep(u32 x, u32 mask)
+{
+ u32 tmp;
+ asm ("pdepl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask));
+ return tmp;
+}
+
+static inline void
+keccak_absorb_lane32bi_bmi2(u32 *lane, u32 x0, u32 x1)
+{
+ x0 = pdep(pext(x0, 0x55555555), 0x0000ffff) | (pext(x0, 0xaaaaaaaa) << 16);
+ x1 = pdep(pext(x1, 0x55555555), 0x0000ffff) | (pext(x1, 0xaaaaaaaa) << 16);
+
+ lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16);
+ lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL);
+}
+
+static unsigned int
+keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes,
+ unsigned int nlanes, int blocklanes)
+{
+ unsigned int burn = 0;
+
+ while (nlanes)
+ {
+ keccak_absorb_lane32bi_bmi2(&hd->u.state32bi[pos * 2],
+ buf_get_le32(lanes + 0),
+ buf_get_le32(lanes + 4));
+ lanes += 8;
+ nlanes--;
+
+ if (++pos == blocklanes)
+ {
+ burn = keccak_f1600_state_permute32bi_bmi2(hd);
+ pos = 0;
+ }
+ }
+
+ return burn;
+}
+
+static unsigned int
+keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+ unsigned int outlen)
+{
+ unsigned int i;
+ u32 x0;
+ u32 x1;
+ u32 t;
+
+ /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
+
+ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
+ {
+ x0 = hd->u.state32bi[i * 2 + 0];
+ x1 = hd->u.state32bi[i * 2 + 1];
+
+ t = (x0 & 0x0000FFFFUL) + (x1 << 16);
+ x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL);
+ x0 = t;
+
+ x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554);
+ x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554);
+
+ buf_put_le32(&outbuf[0], x0);
+ buf_put_le32(&outbuf[4], x1);
+ outbuf += 8;
+ }
+
+ return 0;
+}
+
+static const keccak_ops_t keccak_bmi2_32bi_ops =
+{
+ .permute = keccak_f1600_state_permute32bi_bmi2,
+ .absorb = keccak_absorb_lanes32bi_bmi2,
+ .extract = keccak_extract32bi_bmi2,
+};
+
+#endif /* USE_32BIT_BMI2 */
+
+
+#ifdef USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static inline void
+keccak_bwrite_s390x (void *context, const byte *in, size_t inlen)
+{
+ KECCAK_CONTEXT *ctx = context;
+
+ /* Write full-blocks. */
+ kimd_execute (ctx->kimd_func, &ctx->state, in, inlen);
+ return;
+}
+
+static inline void
+keccak_final_s390x (void *context)
+{
+ KECCAK_CONTEXT *ctx = context;
+
+ if (ctx->suffix == SHA3_DELIMITED_SUFFIX)
+ {
+ klmd_execute (ctx->kimd_func, &ctx->state, ctx->buf, ctx->count);
+ }
+ else
+ {
+ klmd_shake_execute (ctx->kimd_func, &ctx->state, NULL, 0, ctx->buf,
+ ctx->count);
+ ctx->count = 0;
+ ctx->buf_pos = 0;
+ }
+
+ return;
+}
+
+static inline void
+keccak_bextract_s390x (void *context, byte *out, size_t outlen)
+{
+ KECCAK_CONTEXT *ctx = context;
+
+ /* Extract full-blocks. */
+ klmd_shake_execute (ctx->kimd_func | KLMD_PADDING_STATE, &ctx->state,
+ out, outlen, NULL, 0);
+ return;
+}
+
+static void
+keccak_write_s390x (void *context, const byte *inbuf, size_t inlen)
+{
+ KECCAK_CONTEXT *hd = context;
+ const size_t blocksize = hd->blocksize;
+ size_t inblocks;
+ size_t copylen;
+
+ while (hd->count)
+ {
+ if (hd->count == blocksize) /* Flush the buffer. */
+ {
+ keccak_bwrite_s390x (hd, hd->buf, blocksize);
+ hd->count = 0;
+ }
+ else
+ {
+ copylen = inlen;
+ if (copylen > blocksize - hd->count)
+ copylen = blocksize - hd->count;
+
+ if (copylen == 0)
+ break;
+
+ buf_cpy (&hd->buf[hd->count], inbuf, copylen);
+ hd->count += copylen;
+ inbuf += copylen;
+ inlen -= copylen;
+ }
+ }
+
+ if (inlen == 0)
+ return;
+
+ if (inlen >= blocksize)
+ {
+ inblocks = inlen / blocksize;
+ keccak_bwrite_s390x (hd, inbuf, inblocks * blocksize);
+ hd->count = 0;
+ inlen -= inblocks * blocksize;
+ inbuf += inblocks * blocksize;
+ }
+
+ if (inlen)
+ {
+ buf_cpy (hd->buf, inbuf, inlen);
+ hd->count = inlen;
+ }
+}
+
+static void
+keccak_extract_s390x (void *context, void *outbuf_arg, size_t outlen)
+{
+ KECCAK_CONTEXT *hd = context;
+ const size_t blocksize = hd->blocksize;
+ byte *outbuf = outbuf_arg;
+
+ while (outlen)
+ {
+ gcry_assert(hd->count == 0 || hd->buf_pos < hd->count);
+
+ if (hd->buf_pos < hd->count && outlen)
+ {
+ size_t copylen = hd->count - hd->buf_pos;
+
+ if (copylen > outlen)
+ copylen = outlen;
+
+ buf_cpy (outbuf, &hd->buf[hd->buf_pos], copylen);
+
+ outbuf += copylen;
+ outlen -= copylen;
+ hd->buf_pos += copylen;
+ }
+
+ if (hd->buf_pos == hd->count)
+ {
+ hd->buf_pos = 0;
+ hd->count = 0;
+ }
+
+ if (outlen == 0)
+ return;
+
+ if (outlen >= blocksize)
+ {
+ size_t outblocks = outlen / blocksize;
+
+ keccak_bextract_s390x (context, outbuf, outblocks * blocksize);
+
+ outlen -= outblocks * blocksize;
+ outbuf += outblocks * blocksize;
+
+ if (outlen == 0)
+ return;
+ }
+
+ keccak_bextract_s390x (context, hd->buf, blocksize);
+ hd->count = blocksize;
+ }
+}
+#endif /* USE_S390X_CRYPTO */
+
+
+static void
+keccak_write (void *context, const void *inbuf_arg, size_t inlen)
+{
+ KECCAK_CONTEXT *ctx = context;
+ const size_t bsize = ctx->blocksize;
+ const size_t blocklanes = bsize / 8;
+ const byte *inbuf = inbuf_arg;
+ unsigned int nburn, burn = 0;
+ unsigned int count, i;
+ unsigned int pos, nlanes;
+
+#ifdef USE_S390X_CRYPTO
+ if (ctx->kimd_func)
+ {
+ keccak_write_s390x (context, inbuf, inlen);
+ return;
+ }
+#endif
+
+ count = ctx->count;
+
+ if (inlen && (count % 8))
+ {
+ byte lane[8] = { 0, };
+
+ /* Complete absorbing partial input lane. */
+
+ pos = count / 8;
+
+ for (i = count % 8; inlen && i < 8; i++)
+ {
+ lane[i] = *inbuf++;
+ inlen--;
+ count++;
+ }
+
+ if (count == bsize)
+ count = 0;
+
+ nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1,
+ (count % 8) ? -1 : blocklanes);
+ burn = nburn > burn ? nburn : burn;
+ }
+
+ /* Absorb full input lanes. */
+
+ pos = count / 8;
+ nlanes = inlen / 8;
+ if (nlanes > 0)
+ {
+ nburn = ctx->ops->absorb(&ctx->state, pos, inbuf, nlanes, blocklanes);
+ burn = nburn > burn ? nburn : burn;
+ inlen -= nlanes * 8;
+ inbuf += nlanes * 8;
+ count += nlanes * 8;
+ count = count % bsize;
+ }
+
+ if (inlen)
+ {
+ byte lane[8] = { 0, };
+
+ /* Absorb remaining partial input lane. */
+
+ pos = count / 8;
+
+ for (i = count % 8; inlen && i < 8; i++)
+ {
+ lane[i] = *inbuf++;
+ inlen--;
+ count++;
+ }
+
+ nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, -1);
+ burn = nburn > burn ? nburn : burn;
+
+ gcry_assert(count < bsize);
+ }
+
+ ctx->count = count;
+
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+
+static void
+keccak_init (int algo, void *context, unsigned int flags)
+{
+ KECCAK_CONTEXT *ctx = context;
+ KECCAK_STATE *hd = &ctx->state;
+ unsigned int features = _gcry_get_hw_features ();
+
+ (void)flags;
+ (void)features;
+
+ memset (hd, 0, sizeof *hd);
+
+ ctx->count = 0;
+
+ /* Select generic implementation. */
+#ifdef USE_64BIT
+ ctx->ops = &keccak_generic64_ops;
+#elif defined USE_32BIT
+ ctx->ops = &keccak_generic32bi_ops;
+#endif
+
+ /* Select optimized implementation based in hw features. */
+ if (0) {}
+#ifdef USE_64BIT_ARM_NEON
+ else if (features & HWF_ARM_NEON)
+ ctx->ops = &keccak_armv7_neon_64_ops;
+#endif
+#ifdef USE_64BIT_BMI2
+ else if (features & HWF_INTEL_BMI2)
+ ctx->ops = &keccak_bmi2_64_ops;
+#endif
+#ifdef USE_32BIT_BMI2
+ else if (features & HWF_INTEL_BMI2)
+ ctx->ops = &keccak_bmi2_32bi_ops;
+#endif
+#ifdef USE_64BIT_SHLD
+ else if (features & HWF_INTEL_FAST_SHLD)
+ ctx->ops = &keccak_shld_64_ops;
+#endif
+
+ /* Set input block size, in Keccak terms this is called 'rate'. */
+
+ switch (algo)
+ {
+ case GCRY_MD_SHA3_224:
+ ctx->suffix = SHA3_DELIMITED_SUFFIX;
+ ctx->blocksize = 1152 / 8;
+ ctx->outlen = 224 / 8;
+ break;
+ case GCRY_MD_SHA3_256:
+ ctx->suffix = SHA3_DELIMITED_SUFFIX;
+ ctx->blocksize = 1088 / 8;
+ ctx->outlen = 256 / 8;
+ break;
+ case GCRY_MD_SHA3_384:
+ ctx->suffix = SHA3_DELIMITED_SUFFIX;
+ ctx->blocksize = 832 / 8;
+ ctx->outlen = 384 / 8;
+ break;
+ case GCRY_MD_SHA3_512:
+ ctx->suffix = SHA3_DELIMITED_SUFFIX;
+ ctx->blocksize = 576 / 8;
+ ctx->outlen = 512 / 8;
+ break;
+ case GCRY_MD_SHAKE128:
+ ctx->suffix = SHAKE_DELIMITED_SUFFIX;
+ ctx->blocksize = 1344 / 8;
+ ctx->outlen = 0;
+ break;
+ case GCRY_MD_SHAKE256:
+ ctx->suffix = SHAKE_DELIMITED_SUFFIX;
+ ctx->blocksize = 1088 / 8;
+ ctx->outlen = 0;
+ break;
+ default:
+ BUG();
+ }
+
+#ifdef USE_S390X_CRYPTO
+ ctx->kimd_func = 0;
+ if ((features & HWF_S390X_MSA) != 0)
+ {
+ unsigned int kimd_func = 0;
+
+ switch (algo)
+ {
+ case GCRY_MD_SHA3_224:
+ kimd_func = KMID_FUNCTION_SHA3_224;
+ break;
+ case GCRY_MD_SHA3_256:
+ kimd_func = KMID_FUNCTION_SHA3_256;
+ break;
+ case GCRY_MD_SHA3_384:
+ kimd_func = KMID_FUNCTION_SHA3_384;
+ break;
+ case GCRY_MD_SHA3_512:
+ kimd_func = KMID_FUNCTION_SHA3_512;
+ break;
+ case GCRY_MD_SHAKE128:
+ kimd_func = KMID_FUNCTION_SHAKE128;
+ break;
+ case GCRY_MD_SHAKE256:
+ kimd_func = KMID_FUNCTION_SHAKE256;
+ break;
+ }
+
+ if ((kimd_query () & km_function_to_mask (kimd_func)) &&
+ (klmd_query () & km_function_to_mask (kimd_func)))
+ {
+ ctx->kimd_func = kimd_func;
+ }
+ }
+#endif
+}
+
+static void
+sha3_224_init (void *context, unsigned int flags)
+{
+ keccak_init (GCRY_MD_SHA3_224, context, flags);
+}
+
+static void
+sha3_256_init (void *context, unsigned int flags)
+{
+ keccak_init (GCRY_MD_SHA3_256, context, flags);
+}
+
+static void
+sha3_384_init (void *context, unsigned int flags)
+{
+ keccak_init (GCRY_MD_SHA3_384, context, flags);
+}
+
+static void
+sha3_512_init (void *context, unsigned int flags)
+{
+ keccak_init (GCRY_MD_SHA3_512, context, flags);
+}
+
+static void
+shake128_init (void *context, unsigned int flags)
+{
+ keccak_init (GCRY_MD_SHAKE128, context, flags);
+}
+
+static void
+shake256_init (void *context, unsigned int flags)
+{
+ keccak_init (GCRY_MD_SHAKE256, context, flags);
+}
+
+/* The routine final terminates the computation and
+ * returns the digest.
+ * The handle is prepared for a new cycle, but adding bytes to the
+ * handle will the destroy the returned buffer.
+ * Returns: 64 bytes representing the digest. When used for sha384,
+ * we take the leftmost 48 of those bytes.
+ */
+static void
+keccak_final (void *context)
+{
+ KECCAK_CONTEXT *ctx = context;
+ KECCAK_STATE *hd = &ctx->state;
+ const size_t bsize = ctx->blocksize;
+ const byte suffix = ctx->suffix;
+ unsigned int nburn, burn = 0;
+ unsigned int lastbytes;
+ byte lane[8];
+
+#ifdef USE_S390X_CRYPTO
+ if (ctx->kimd_func)
+ {
+ keccak_final_s390x (context);
+ return;
+ }
+#endif
+
+ lastbytes = ctx->count;
+
+ /* Do the padding and switch to the squeezing phase */
+
+ /* Absorb the last few bits and add the first bit of padding (which
+ coincides with the delimiter in delimited suffix) */
+ buf_put_le64(lane, (u64)suffix << ((lastbytes % 8) * 8));
+ nburn = ctx->ops->absorb(&ctx->state, lastbytes / 8, lane, 1, -1);
+ burn = nburn > burn ? nburn : burn;
+
+ /* Add the second bit of padding. */
+ buf_put_le64(lane, (u64)0x80 << (((bsize - 1) % 8) * 8));
+ nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1);
+ burn = nburn > burn ? nburn : burn;
+
+ if (suffix == SHA3_DELIMITED_SUFFIX)
+ {
+ /* Switch to the squeezing phase. */
+ nburn = ctx->ops->permute(hd);
+ burn = nburn > burn ? nburn : burn;
+
+ /* Squeeze out the SHA3 digest. */
+ nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen);
+ burn = nburn > burn ? nburn : burn;
+ }
+ else
+ {
+ /* Output for SHAKE can now be read with md_extract(). */
+
+ ctx->count = 0;
+ }
+
+ wipememory(lane, sizeof(lane));
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+
+static byte *
+keccak_read (void *context)
+{
+ KECCAK_CONTEXT *ctx = (KECCAK_CONTEXT *) context;
+ KECCAK_STATE *hd = &ctx->state;
+ return (byte *)&hd->u;
+}
+
+
+static void
+keccak_extract (void *context, void *out, size_t outlen)
+{
+ KECCAK_CONTEXT *ctx = context;
+ KECCAK_STATE *hd = &ctx->state;
+ const size_t bsize = ctx->blocksize;
+ unsigned int nburn, burn = 0;
+ byte *outbuf = out;
+ unsigned int nlanes;
+ unsigned int nleft;
+ unsigned int count;
+ unsigned int i;
+ byte lane[8];
+
+#ifdef USE_S390X_CRYPTO
+ if (ctx->kimd_func)
+ {
+ keccak_extract_s390x (context, out, outlen);
+ return;
+ }
+#endif
+
+ count = ctx->count;
+
+ while (count && outlen && (outlen < 8 || count % 8))
+ {
+ /* Extract partial lane. */
+ nburn = ctx->ops->extract(hd, count / 8, lane, 8);
+ burn = nburn > burn ? nburn : burn;
+
+ for (i = count % 8; outlen && i < 8; i++)
+ {
+ *outbuf++ = lane[i];
+ outlen--;
+ count++;
+ }
+
+ gcry_assert(count <= bsize);
+
+ if (count == bsize)
+ count = 0;
+ }
+
+ if (outlen >= 8 && count)
+ {
+ /* Extract tail of partial block. */
+ nlanes = outlen / 8;
+ nleft = (bsize - count) / 8;
+ nlanes = nlanes < nleft ? nlanes : nleft;
+
+ nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8);
+ burn = nburn > burn ? nburn : burn;
+ outlen -= nlanes * 8;
+ outbuf += nlanes * 8;
+ count += nlanes * 8;
+
+ gcry_assert(count <= bsize);
+
+ if (count == bsize)
+ count = 0;
+ }
+
+ while (outlen >= bsize)
+ {
+ gcry_assert(count == 0);
+
+ /* Squeeze more. */
+ nburn = ctx->ops->permute(hd);
+ burn = nburn > burn ? nburn : burn;
+
+ /* Extract full block. */
+ nburn = ctx->ops->extract(hd, 0, outbuf, bsize);
+ burn = nburn > burn ? nburn : burn;
+
+ outlen -= bsize;
+ outbuf += bsize;
+ }
+
+ if (outlen)
+ {
+ gcry_assert(outlen < bsize);
+
+ if (count == 0)
+ {
+ /* Squeeze more. */
+ nburn = ctx->ops->permute(hd);
+ burn = nburn > burn ? nburn : burn;
+ }
+
+ if (outlen >= 8)
+ {
+ /* Extract head of partial block. */
+ nlanes = outlen / 8;
+ nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8);
+ burn = nburn > burn ? nburn : burn;
+ outlen -= nlanes * 8;
+ outbuf += nlanes * 8;
+ count += nlanes * 8;
+
+ gcry_assert(count < bsize);
+ }
+
+ if (outlen)
+ {
+ /* Extract head of partial lane. */
+ nburn = ctx->ops->extract(hd, count / 8, lane, 8);
+ burn = nburn > burn ? nburn : burn;
+
+ for (i = count % 8; outlen && i < 8; i++)
+ {
+ *outbuf++ = lane[i];
+ outlen--;
+ count++;
+ }
+
+ gcry_assert(count < bsize);
+ }
+ }
+
+ ctx->count = count;
+
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 'spec->mdlen' bytes. */
+static void
+_gcry_sha3_hash_buffer (void *outbuf, const void *buffer, size_t length,
+ const gcry_md_spec_t *spec)
+{
+ KECCAK_CONTEXT hd;
+
+ spec->init (&hd, 0);
+ keccak_write (&hd, buffer, length);
+ keccak_final (&hd);
+ memcpy (outbuf, keccak_read (&hd), spec->mdlen);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+static void
+_gcry_sha3_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt,
+ const gcry_md_spec_t *spec)
+{
+ KECCAK_CONTEXT hd;
+
+ spec->init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ keccak_write (&hd, (const char*)iov[0].data + iov[0].off, iov[0].len);
+ keccak_final (&hd);
+ memcpy (outbuf, keccak_read (&hd), spec->mdlen);
+}
+
+
+static void
+_gcry_sha3_224_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_224);
+}
+
+static void
+_gcry_sha3_256_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_256);
+}
+
+static void
+_gcry_sha3_384_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_384);
+}
+
+static void
+_gcry_sha3_512_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_512);
+}
+
+static void
+_gcry_sha3_224_hash_buffers (void *outbuf, const gcry_buffer_t *iov,
+ int iovcnt)
+{
+ _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_224);
+}
+
+static void
+_gcry_sha3_256_hash_buffers (void *outbuf, const gcry_buffer_t *iov,
+ int iovcnt)
+{
+ _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_256);
+}
+
+static void
+_gcry_sha3_384_hash_buffers (void *outbuf, const gcry_buffer_t *iov,
+ int iovcnt)
+{
+ _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_384);
+}
+
+static void
+_gcry_sha3_512_hash_buffers (void *outbuf, const gcry_buffer_t *iov,
+ int iovcnt)
+{
+ _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_512);
+}
+
+
+/*
+ Self-test section.
+ */
+
+
+static gpg_err_code_t
+selftests_keccak (int algo, int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+ const char *short_hash;
+ const char *long_hash;
+ const char *one_million_a_hash;
+ int hash_len;
+
+ switch (algo)
+ {
+ default:
+ BUG();
+
+ case GCRY_MD_SHA3_224:
+ short_hash =
+ "\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76\x6f"
+ "\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf";
+ long_hash =
+ "\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5"
+ "\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc";
+ one_million_a_hash =
+ "\xd6\x93\x35\xb9\x33\x25\x19\x2e\x51\x6a\x91\x2e\x6d\x19\xa1\x5c"
+ "\xb5\x1c\x6e\xd5\xc1\x52\x43\xe7\xa7\xfd\x65\x3c";
+ hash_len = 28;
+ break;
+
+ case GCRY_MD_SHA3_256:
+ short_hash =
+ "\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90\xbd"
+ "\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43\x15\x32";
+ long_hash =
+ "\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf\xdb"
+ "\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad\x1d\x18";
+ one_million_a_hash =
+ "\x5c\x88\x75\xae\x47\x4a\x36\x34\xba\x4f\xd5\x5e\xc8\x5b\xff\xd6"
+ "\x61\xf3\x2a\xca\x75\xc6\xd6\x99\xd0\xcd\xcb\x6c\x11\x58\x91\xc1";
+ hash_len = 32;
+ break;
+
+ case GCRY_MD_SHA3_384:
+ short_hash =
+ "\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad\x8d"
+ "\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b\xe4\xb2"
+ "\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28\x37\x6d\x25";
+ long_hash =
+ "\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91"
+ "\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc"
+ "\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7";
+ one_million_a_hash =
+ "\xee\xe9\xe2\x4d\x78\xc1\x85\x53\x37\x98\x34\x51\xdf\x97\xc8\xad"
+ "\x9e\xed\xf2\x56\xc6\x33\x4f\x8e\x94\x8d\x25\x2d\x5e\x0e\x76\x84"
+ "\x7a\xa0\x77\x4d\xdb\x90\xa8\x42\x19\x0d\x2c\x55\x8b\x4b\x83\x40";
+ hash_len = 48;
+ break;
+
+ case GCRY_MD_SHA3_512:
+ short_hash =
+ "\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09\x6e"
+ "\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2\x71\x2e"
+ "\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47\xe3\x93\x40"
+ "\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27\x4e\xec\x53\xf0";
+ long_hash =
+ "\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9"
+ "\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa"
+ "\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89"
+ "\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85";
+ one_million_a_hash =
+ "\x3c\x3a\x87\x6d\xa1\x40\x34\xab\x60\x62\x7c\x07\x7b\xb9\x8f\x7e"
+ "\x12\x0a\x2a\x53\x70\x21\x2d\xff\xb3\x38\x5a\x18\xd4\xf3\x88\x59"
+ "\xed\x31\x1d\x0a\x9d\x51\x41\xce\x9c\xc5\xc6\x6e\xe6\x89\xb2\x66"
+ "\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87";
+ hash_len = 64;
+ break;
+
+ case GCRY_MD_SHAKE128:
+ short_hash =
+ "\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7"
+ "\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8";
+ long_hash =
+ "\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b"
+ "\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b";
+ one_million_a_hash =
+ "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11"
+ "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58";
+ hash_len = 32;
+ break;
+
+ case GCRY_MD_SHAKE256:
+ short_hash =
+ "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d"
+ "\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39";
+ long_hash =
+ "\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9"
+ "\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45";
+ one_million_a_hash =
+ "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb"
+ "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a";
+ hash_len = 32;
+ break;
+ }
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one (algo, 0, "abc", 3, short_hash,
+ hash_len);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (algo, 0,
+ "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+ long_hash, hash_len);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one (algo, 1, NULL, 0,
+ one_million_a_hash, hash_len);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+failed:
+ if (report)
+ report ("digest", algo, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_MD_SHA3_224:
+ case GCRY_MD_SHA3_256:
+ case GCRY_MD_SHA3_384:
+ case GCRY_MD_SHA3_512:
+ case GCRY_MD_SHAKE128:
+ case GCRY_MD_SHAKE256:
+ ec = selftests_keccak (algo, extended, report);
+ break;
+ default:
+ ec = GPG_ERR_DIGEST_ALGO;
+ break;
+ }
+
+ return ec;
+}
+
+
+
+
+static byte sha3_224_asn[] = { 0x30 };
+static gcry_md_oid_spec_t oid_spec_sha3_224[] =
+ {
+ { "2.16.840.1.101.3.4.2.7" },
+ /* PKCS#1 sha3_224WithRSAEncryption */
+ { "?" },
+ { NULL }
+ };
+static byte sha3_256_asn[] = { 0x30 };
+static gcry_md_oid_spec_t oid_spec_sha3_256[] =
+ {
+ { "2.16.840.1.101.3.4.2.8" },
+ /* PKCS#1 sha3_256WithRSAEncryption */
+ { "?" },
+ { NULL }
+ };
+static byte sha3_384_asn[] = { 0x30 };
+static gcry_md_oid_spec_t oid_spec_sha3_384[] =
+ {
+ { "2.16.840.1.101.3.4.2.9" },
+ /* PKCS#1 sha3_384WithRSAEncryption */
+ { "?" },
+ { NULL }
+ };
+static byte sha3_512_asn[] = { 0x30 };
+static gcry_md_oid_spec_t oid_spec_sha3_512[] =
+ {
+ { "2.16.840.1.101.3.4.2.10" },
+ /* PKCS#1 sha3_512WithRSAEncryption */
+ { "?" },
+ { NULL }
+ };
+static byte shake128_asn[] = { 0x30 };
+static gcry_md_oid_spec_t oid_spec_shake128[] =
+ {
+ { "2.16.840.1.101.3.4.2.11" },
+ /* PKCS#1 shake128WithRSAEncryption */
+ { "?" },
+ { NULL }
+ };
+static byte shake256_asn[] = { 0x30 };
+static gcry_md_oid_spec_t oid_spec_shake256[] =
+ {
+ { "2.16.840.1.101.3.4.2.12" },
+ /* PKCS#1 shake256WithRSAEncryption */
+ { "?" },
+ { NULL }
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha3_224 =
+ {
+ GCRY_MD_SHA3_224, {0, 1},
+ "SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28,
+ sha3_224_init, keccak_write, keccak_final, keccak_read, NULL,
+ _gcry_sha3_224_hash_buffer, _gcry_sha3_224_hash_buffers,
+ sizeof (KECCAK_CONTEXT),
+ run_selftests
+ };
+gcry_md_spec_t _gcry_digest_spec_sha3_256 =
+ {
+ GCRY_MD_SHA3_256, {0, 1},
+ "SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32,
+ sha3_256_init, keccak_write, keccak_final, keccak_read, NULL,
+ _gcry_sha3_256_hash_buffer, _gcry_sha3_256_hash_buffers,
+ sizeof (KECCAK_CONTEXT),
+ run_selftests
+ };
+gcry_md_spec_t _gcry_digest_spec_sha3_384 =
+ {
+ GCRY_MD_SHA3_384, {0, 1},
+ "SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48,
+ sha3_384_init, keccak_write, keccak_final, keccak_read, NULL,
+ _gcry_sha3_384_hash_buffer, _gcry_sha3_384_hash_buffers,
+ sizeof (KECCAK_CONTEXT),
+ run_selftests
+ };
+gcry_md_spec_t _gcry_digest_spec_sha3_512 =
+ {
+ GCRY_MD_SHA3_512, {0, 1},
+ "SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64,
+ sha3_512_init, keccak_write, keccak_final, keccak_read, NULL,
+ _gcry_sha3_512_hash_buffer, _gcry_sha3_512_hash_buffers,
+ sizeof (KECCAK_CONTEXT),
+ run_selftests
+ };
+gcry_md_spec_t _gcry_digest_spec_shake128 =
+ {
+ GCRY_MD_SHAKE128, {0, 1},
+ "SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0,
+ shake128_init, keccak_write, keccak_final, NULL, keccak_extract,
+ NULL, NULL,
+ sizeof (KECCAK_CONTEXT),
+ run_selftests
+ };
+gcry_md_spec_t _gcry_digest_spec_shake256 =
+ {
+ GCRY_MD_SHAKE256, {0, 1},
+ "SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0,
+ shake256_init, keccak_write, keccak_final, NULL, keccak_extract,
+ NULL, NULL,
+ sizeof (KECCAK_CONTEXT),
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/keccak_permute_32.h b/comm/third_party/libgcrypt/cipher/keccak_permute_32.h
new file mode 100644
index 0000000000..1ce42a42fc
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/keccak_permute_32.h
@@ -0,0 +1,536 @@
+/* keccak_permute_32.h - Keccak permute function (simple 32bit bit-interleaved)
+ * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 "keccakc1024/simple32bi/
+ * Keccak-simple32BI.c" implementation by Ronny Van Keer from SUPERCOP toolkit
+ * package.
+ */
+
+/* Function that computes the Keccak-f[1600] permutation on the given state. */
+static unsigned int
+KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd)
+{
+ const u32 *round_consts = round_consts_32bit;
+ const u32 *round_consts_end = round_consts_32bit + 2 * 24;
+ u32 Aba0, Abe0, Abi0, Abo0, Abu0;
+ u32 Aba1, Abe1, Abi1, Abo1, Abu1;
+ u32 Aga0, Age0, Agi0, Ago0, Agu0;
+ u32 Aga1, Age1, Agi1, Ago1, Agu1;
+ u32 Aka0, Ake0, Aki0, Ako0, Aku0;
+ u32 Aka1, Ake1, Aki1, Ako1, Aku1;
+ u32 Ama0, Ame0, Ami0, Amo0, Amu0;
+ u32 Ama1, Ame1, Ami1, Amo1, Amu1;
+ u32 Asa0, Ase0, Asi0, Aso0, Asu0;
+ u32 Asa1, Ase1, Asi1, Aso1, Asu1;
+ u32 BCa0, BCe0, BCi0, BCo0, BCu0;
+ u32 BCa1, BCe1, BCi1, BCo1, BCu1;
+ u32 Da0, De0, Di0, Do0, Du0;
+ u32 Da1, De1, Di1, Do1, Du1;
+ u32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0;
+ u32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1;
+ u32 Ega0, Ege0, Egi0, Ego0, Egu0;
+ u32 Ega1, Ege1, Egi1, Ego1, Egu1;
+ u32 Eka0, Eke0, Eki0, Eko0, Eku0;
+ u32 Eka1, Eke1, Eki1, Eko1, Eku1;
+ u32 Ema0, Eme0, Emi0, Emo0, Emu0;
+ u32 Ema1, Eme1, Emi1, Emo1, Emu1;
+ u32 Esa0, Ese0, Esi0, Eso0, Esu0;
+ u32 Esa1, Ese1, Esi1, Eso1, Esu1;
+ u32 *state = hd->u.state32bi;
+
+ Aba0 = state[0];
+ Aba1 = state[1];
+ Abe0 = state[2];
+ Abe1 = state[3];
+ Abi0 = state[4];
+ Abi1 = state[5];
+ Abo0 = state[6];
+ Abo1 = state[7];
+ Abu0 = state[8];
+ Abu1 = state[9];
+ Aga0 = state[10];
+ Aga1 = state[11];
+ Age0 = state[12];
+ Age1 = state[13];
+ Agi0 = state[14];
+ Agi1 = state[15];
+ Ago0 = state[16];
+ Ago1 = state[17];
+ Agu0 = state[18];
+ Agu1 = state[19];
+ Aka0 = state[20];
+ Aka1 = state[21];
+ Ake0 = state[22];
+ Ake1 = state[23];
+ Aki0 = state[24];
+ Aki1 = state[25];
+ Ako0 = state[26];
+ Ako1 = state[27];
+ Aku0 = state[28];
+ Aku1 = state[29];
+ Ama0 = state[30];
+ Ama1 = state[31];
+ Ame0 = state[32];
+ Ame1 = state[33];
+ Ami0 = state[34];
+ Ami1 = state[35];
+ Amo0 = state[36];
+ Amo1 = state[37];
+ Amu0 = state[38];
+ Amu1 = state[39];
+ Asa0 = state[40];
+ Asa1 = state[41];
+ Ase0 = state[42];
+ Ase1 = state[43];
+ Asi0 = state[44];
+ Asi1 = state[45];
+ Aso0 = state[46];
+ Aso1 = state[47];
+ Asu0 = state[48];
+ Asu1 = state[49];
+
+ do
+ {
+ /* prepareTheta */
+ BCa0 = Aba0 ^ Aga0 ^ Aka0 ^ Ama0 ^ Asa0;
+ BCa1 = Aba1 ^ Aga1 ^ Aka1 ^ Ama1 ^ Asa1;
+ BCe0 = Abe0 ^ Age0 ^ Ake0 ^ Ame0 ^ Ase0;
+ BCe1 = Abe1 ^ Age1 ^ Ake1 ^ Ame1 ^ Ase1;
+ BCi0 = Abi0 ^ Agi0 ^ Aki0 ^ Ami0 ^ Asi0;
+ BCi1 = Abi1 ^ Agi1 ^ Aki1 ^ Ami1 ^ Asi1;
+ BCo0 = Abo0 ^ Ago0 ^ Ako0 ^ Amo0 ^ Aso0;
+ BCo1 = Abo1 ^ Ago1 ^ Ako1 ^ Amo1 ^ Aso1;
+ BCu0 = Abu0 ^ Agu0 ^ Aku0 ^ Amu0 ^ Asu0;
+ BCu1 = Abu1 ^ Agu1 ^ Aku1 ^ Amu1 ^ Asu1;
+
+ /* thetaRhoPiChiIota(round , A, E) */
+ Da0 = BCu0 ^ ROL32(BCe1, 1);
+ Da1 = BCu1 ^ BCe0;
+ De0 = BCa0 ^ ROL32(BCi1, 1);
+ De1 = BCa1 ^ BCi0;
+ Di0 = BCe0 ^ ROL32(BCo1, 1);
+ Di1 = BCe1 ^ BCo0;
+ Do0 = BCi0 ^ ROL32(BCu1, 1);
+ Do1 = BCi1 ^ BCu0;
+ Du0 = BCo0 ^ ROL32(BCa1, 1);
+ Du1 = BCo1 ^ BCa0;
+
+ Aba0 ^= Da0;
+ BCa0 = Aba0;
+ Age0 ^= De0;
+ BCe0 = ROL32(Age0, 22);
+ Aki1 ^= Di1;
+ BCi0 = ROL32(Aki1, 22);
+ Amo1 ^= Do1;
+ BCo0 = ROL32(Amo1, 11);
+ Asu0 ^= Du0;
+ BCu0 = ROL32(Asu0, 7);
+ Eba0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Eba0 ^= *(round_consts++);
+ Ebe0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Ebi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Ebo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Ebu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Aba1 ^= Da1;
+ BCa1 = Aba1;
+ Age1 ^= De1;
+ BCe1 = ROL32(Age1, 22);
+ Aki0 ^= Di0;
+ BCi1 = ROL32(Aki0, 21);
+ Amo0 ^= Do0;
+ BCo1 = ROL32(Amo0, 10);
+ Asu1 ^= Du1;
+ BCu1 = ROL32(Asu1, 7);
+ Eba1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Eba1 ^= *(round_consts++);
+ Ebe1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Ebi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Ebo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Ebu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Abo0 ^= Do0;
+ BCa0 = ROL32(Abo0, 14);
+ Agu0 ^= Du0;
+ BCe0 = ROL32(Agu0, 10);
+ Aka1 ^= Da1;
+ BCi0 = ROL32(Aka1, 2);
+ Ame1 ^= De1;
+ BCo0 = ROL32(Ame1, 23);
+ Asi1 ^= Di1;
+ BCu0 = ROL32(Asi1, 31);
+ Ega0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Ege0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Egi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Ego0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Egu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Abo1 ^= Do1;
+ BCa1 = ROL32(Abo1, 14);
+ Agu1 ^= Du1;
+ BCe1 = ROL32(Agu1, 10);
+ Aka0 ^= Da0;
+ BCi1 = ROL32(Aka0, 1);
+ Ame0 ^= De0;
+ BCo1 = ROL32(Ame0, 22);
+ Asi0 ^= Di0;
+ BCu1 = ROL32(Asi0, 30);
+ Ega1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Ege1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Egi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Ego1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Egu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Abe1 ^= De1;
+ BCa0 = ROL32(Abe1, 1);
+ Agi0 ^= Di0;
+ BCe0 = ROL32(Agi0, 3);
+ Ako1 ^= Do1;
+ BCi0 = ROL32(Ako1, 13);
+ Amu0 ^= Du0;
+ BCo0 = ROL32(Amu0, 4);
+ Asa0 ^= Da0;
+ BCu0 = ROL32(Asa0, 9);
+ Eka0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Eke0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Eki0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Eko0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Eku0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Abe0 ^= De0;
+ BCa1 = Abe0;
+ Agi1 ^= Di1;
+ BCe1 = ROL32(Agi1, 3);
+ Ako0 ^= Do0;
+ BCi1 = ROL32(Ako0, 12);
+ Amu1 ^= Du1;
+ BCo1 = ROL32(Amu1, 4);
+ Asa1 ^= Da1;
+ BCu1 = ROL32(Asa1, 9);
+ Eka1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Eke1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Eki1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Eko1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Eku1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Abu1 ^= Du1;
+ BCa0 = ROL32(Abu1, 14);
+ Aga0 ^= Da0;
+ BCe0 = ROL32(Aga0, 18);
+ Ake0 ^= De0;
+ BCi0 = ROL32(Ake0, 5);
+ Ami1 ^= Di1;
+ BCo0 = ROL32(Ami1, 8);
+ Aso0 ^= Do0;
+ BCu0 = ROL32(Aso0, 28);
+ Ema0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Eme0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Emi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Emo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Emu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Abu0 ^= Du0;
+ BCa1 = ROL32(Abu0, 13);
+ Aga1 ^= Da1;
+ BCe1 = ROL32(Aga1, 18);
+ Ake1 ^= De1;
+ BCi1 = ROL32(Ake1, 5);
+ Ami0 ^= Di0;
+ BCo1 = ROL32(Ami0, 7);
+ Aso1 ^= Do1;
+ BCu1 = ROL32(Aso1, 28);
+ Ema1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Eme1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Emi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Emo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Emu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Abi0 ^= Di0;
+ BCa0 = ROL32(Abi0, 31);
+ Ago1 ^= Do1;
+ BCe0 = ROL32(Ago1, 28);
+ Aku1 ^= Du1;
+ BCi0 = ROL32(Aku1, 20);
+ Ama1 ^= Da1;
+ BCo0 = ROL32(Ama1, 21);
+ Ase0 ^= De0;
+ BCu0 = ROL32(Ase0, 1);
+ Esa0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Ese0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Esi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Eso0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Esu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Abi1 ^= Di1;
+ BCa1 = ROL32(Abi1, 31);
+ Ago0 ^= Do0;
+ BCe1 = ROL32(Ago0, 27);
+ Aku0 ^= Du0;
+ BCi1 = ROL32(Aku0, 19);
+ Ama0 ^= Da0;
+ BCo1 = ROL32(Ama0, 20);
+ Ase1 ^= De1;
+ BCu1 = ROL32(Ase1, 1);
+ Esa1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Ese1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Esi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Eso1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Esu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ /* prepareTheta */
+ BCa0 = Eba0 ^ Ega0 ^ Eka0 ^ Ema0 ^ Esa0;
+ BCa1 = Eba1 ^ Ega1 ^ Eka1 ^ Ema1 ^ Esa1;
+ BCe0 = Ebe0 ^ Ege0 ^ Eke0 ^ Eme0 ^ Ese0;
+ BCe1 = Ebe1 ^ Ege1 ^ Eke1 ^ Eme1 ^ Ese1;
+ BCi0 = Ebi0 ^ Egi0 ^ Eki0 ^ Emi0 ^ Esi0;
+ BCi1 = Ebi1 ^ Egi1 ^ Eki1 ^ Emi1 ^ Esi1;
+ BCo0 = Ebo0 ^ Ego0 ^ Eko0 ^ Emo0 ^ Eso0;
+ BCo1 = Ebo1 ^ Ego1 ^ Eko1 ^ Emo1 ^ Eso1;
+ BCu0 = Ebu0 ^ Egu0 ^ Eku0 ^ Emu0 ^ Esu0;
+ BCu1 = Ebu1 ^ Egu1 ^ Eku1 ^ Emu1 ^ Esu1;
+
+ /* thetaRhoPiChiIota(round+1, E, A) */
+ Da0 = BCu0 ^ ROL32(BCe1, 1);
+ Da1 = BCu1 ^ BCe0;
+ De0 = BCa0 ^ ROL32(BCi1, 1);
+ De1 = BCa1 ^ BCi0;
+ Di0 = BCe0 ^ ROL32(BCo1, 1);
+ Di1 = BCe1 ^ BCo0;
+ Do0 = BCi0 ^ ROL32(BCu1, 1);
+ Do1 = BCi1 ^ BCu0;
+ Du0 = BCo0 ^ ROL32(BCa1, 1);
+ Du1 = BCo1 ^ BCa0;
+
+ Eba0 ^= Da0;
+ BCa0 = Eba0;
+ Ege0 ^= De0;
+ BCe0 = ROL32(Ege0, 22);
+ Eki1 ^= Di1;
+ BCi0 = ROL32(Eki1, 22);
+ Emo1 ^= Do1;
+ BCo0 = ROL32(Emo1, 11);
+ Esu0 ^= Du0;
+ BCu0 = ROL32(Esu0, 7);
+ Aba0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Aba0 ^= *(round_consts++);
+ Abe0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Abi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Abo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Abu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Eba1 ^= Da1;
+ BCa1 = Eba1;
+ Ege1 ^= De1;
+ BCe1 = ROL32(Ege1, 22);
+ Eki0 ^= Di0;
+ BCi1 = ROL32(Eki0, 21);
+ Emo0 ^= Do0;
+ BCo1 = ROL32(Emo0, 10);
+ Esu1 ^= Du1;
+ BCu1 = ROL32(Esu1, 7);
+ Aba1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Aba1 ^= *(round_consts++);
+ Abe1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Abi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Abo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Abu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Ebo0 ^= Do0;
+ BCa0 = ROL32(Ebo0, 14);
+ Egu0 ^= Du0;
+ BCe0 = ROL32(Egu0, 10);
+ Eka1 ^= Da1;
+ BCi0 = ROL32(Eka1, 2);
+ Eme1 ^= De1;
+ BCo0 = ROL32(Eme1, 23);
+ Esi1 ^= Di1;
+ BCu0 = ROL32(Esi1, 31);
+ Aga0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Age0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Agi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Ago0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Agu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Ebo1 ^= Do1;
+ BCa1 = ROL32(Ebo1, 14);
+ Egu1 ^= Du1;
+ BCe1 = ROL32(Egu1, 10);
+ Eka0 ^= Da0;
+ BCi1 = ROL32(Eka0, 1);
+ Eme0 ^= De0;
+ BCo1 = ROL32(Eme0, 22);
+ Esi0 ^= Di0;
+ BCu1 = ROL32(Esi0, 30);
+ Aga1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Age1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Agi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Ago1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Agu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Ebe1 ^= De1;
+ BCa0 = ROL32(Ebe1, 1);
+ Egi0 ^= Di0;
+ BCe0 = ROL32(Egi0, 3);
+ Eko1 ^= Do1;
+ BCi0 = ROL32(Eko1, 13);
+ Emu0 ^= Du0;
+ BCo0 = ROL32(Emu0, 4);
+ Esa0 ^= Da0;
+ BCu0 = ROL32(Esa0, 9);
+ Aka0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Ake0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Aki0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Ako0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Aku0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Ebe0 ^= De0;
+ BCa1 = Ebe0;
+ Egi1 ^= Di1;
+ BCe1 = ROL32(Egi1, 3);
+ Eko0 ^= Do0;
+ BCi1 = ROL32(Eko0, 12);
+ Emu1 ^= Du1;
+ BCo1 = ROL32(Emu1, 4);
+ Esa1 ^= Da1;
+ BCu1 = ROL32(Esa1, 9);
+ Aka1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Ake1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Aki1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Ako1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Aku1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Ebu1 ^= Du1;
+ BCa0 = ROL32(Ebu1, 14);
+ Ega0 ^= Da0;
+ BCe0 = ROL32(Ega0, 18);
+ Eke0 ^= De0;
+ BCi0 = ROL32(Eke0, 5);
+ Emi1 ^= Di1;
+ BCo0 = ROL32(Emi1, 8);
+ Eso0 ^= Do0;
+ BCu0 = ROL32(Eso0, 28);
+ Ama0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Ame0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Ami0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Amo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Amu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Ebu0 ^= Du0;
+ BCa1 = ROL32(Ebu0, 13);
+ Ega1 ^= Da1;
+ BCe1 = ROL32(Ega1, 18);
+ Eke1 ^= De1;
+ BCi1 = ROL32(Eke1, 5);
+ Emi0 ^= Di0;
+ BCo1 = ROL32(Emi0, 7);
+ Eso1 ^= Do1;
+ BCu1 = ROL32(Eso1, 28);
+ Ama1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Ame1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Ami1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Amo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Amu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+ Ebi0 ^= Di0;
+ BCa0 = ROL32(Ebi0, 31);
+ Ego1 ^= Do1;
+ BCe0 = ROL32(Ego1, 28);
+ Eku1 ^= Du1;
+ BCi0 = ROL32(Eku1, 20);
+ Ema1 ^= Da1;
+ BCo0 = ROL32(Ema1, 21);
+ Ese0 ^= De0;
+ BCu0 = ROL32(Ese0, 1);
+ Asa0 = BCa0 ^ ANDN32(BCe0, BCi0);
+ Ase0 = BCe0 ^ ANDN32(BCi0, BCo0);
+ Asi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+ Aso0 = BCo0 ^ ANDN32(BCu0, BCa0);
+ Asu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+ Ebi1 ^= Di1;
+ BCa1 = ROL32(Ebi1, 31);
+ Ego0 ^= Do0;
+ BCe1 = ROL32(Ego0, 27);
+ Eku0 ^= Du0;
+ BCi1 = ROL32(Eku0, 19);
+ Ema0 ^= Da0;
+ BCo1 = ROL32(Ema0, 20);
+ Ese1 ^= De1;
+ BCu1 = ROL32(Ese1, 1);
+ Asa1 = BCa1 ^ ANDN32(BCe1, BCi1);
+ Ase1 = BCe1 ^ ANDN32(BCi1, BCo1);
+ Asi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+ Aso1 = BCo1 ^ ANDN32(BCu1, BCa1);
+ Asu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+ }
+ while (round_consts < round_consts_end);
+
+ state[0] = Aba0;
+ state[1] = Aba1;
+ state[2] = Abe0;
+ state[3] = Abe1;
+ state[4] = Abi0;
+ state[5] = Abi1;
+ state[6] = Abo0;
+ state[7] = Abo1;
+ state[8] = Abu0;
+ state[9] = Abu1;
+ state[10] = Aga0;
+ state[11] = Aga1;
+ state[12] = Age0;
+ state[13] = Age1;
+ state[14] = Agi0;
+ state[15] = Agi1;
+ state[16] = Ago0;
+ state[17] = Ago1;
+ state[18] = Agu0;
+ state[19] = Agu1;
+ state[20] = Aka0;
+ state[21] = Aka1;
+ state[22] = Ake0;
+ state[23] = Ake1;
+ state[24] = Aki0;
+ state[25] = Aki1;
+ state[26] = Ako0;
+ state[27] = Ako1;
+ state[28] = Aku0;
+ state[29] = Aku1;
+ state[30] = Ama0;
+ state[31] = Ama1;
+ state[32] = Ame0;
+ state[33] = Ame1;
+ state[34] = Ami0;
+ state[35] = Ami1;
+ state[36] = Amo0;
+ state[37] = Amo1;
+ state[38] = Amu0;
+ state[39] = Amu1;
+ state[40] = Asa0;
+ state[41] = Asa1;
+ state[42] = Ase0;
+ state[43] = Ase1;
+ state[44] = Asi0;
+ state[45] = Asi1;
+ state[46] = Aso0;
+ state[47] = Aso1;
+ state[48] = Asu0;
+ state[49] = Asu1;
+
+ return sizeof(void *) * 4 + sizeof(u32) * 12 * 5 * 2;
+}
diff --git a/comm/third_party/libgcrypt/cipher/keccak_permute_64.h b/comm/third_party/libgcrypt/cipher/keccak_permute_64.h
new file mode 100644
index 0000000000..b28c871ec1
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/keccak_permute_64.h
@@ -0,0 +1,385 @@
+/* keccak_permute_64.h - Keccak permute function (simple 64bit)
+ * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 "keccakc1024/simple/Keccak-simple.c"
+ * implementation by Ronny Van Keer from SUPERCOP toolkit package.
+ */
+
+/* Function that computes the Keccak-f[1600] permutation on the given state. */
+static unsigned int
+KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd)
+{
+ const u64 *round_consts = _gcry_keccak_round_consts_64bit;
+ const u64 *round_consts_end = _gcry_keccak_round_consts_64bit + 24;
+ u64 Aba, Abe, Abi, Abo, Abu;
+ u64 Aga, Age, Agi, Ago, Agu;
+ u64 Aka, Ake, Aki, Ako, Aku;
+ u64 Ama, Ame, Ami, Amo, Amu;
+ u64 Asa, Ase, Asi, Aso, Asu;
+ u64 BCa, BCe, BCi, BCo, BCu;
+ u64 Da, De, Di, Do, Du;
+ u64 Eba, Ebe, Ebi, Ebo, Ebu;
+ u64 Ega, Ege, Egi, Ego, Egu;
+ u64 Eka, Eke, Eki, Eko, Eku;
+ u64 Ema, Eme, Emi, Emo, Emu;
+ u64 Esa, Ese, Esi, Eso, Esu;
+ u64 *state = hd->u.state64;
+
+ Aba = state[0];
+ Abe = state[1];
+ Abi = state[2];
+ Abo = state[3];
+ Abu = state[4];
+ Aga = state[5];
+ Age = state[6];
+ Agi = state[7];
+ Ago = state[8];
+ Agu = state[9];
+ Aka = state[10];
+ Ake = state[11];
+ Aki = state[12];
+ Ako = state[13];
+ Aku = state[14];
+ Ama = state[15];
+ Ame = state[16];
+ Ami = state[17];
+ Amo = state[18];
+ Amu = state[19];
+ Asa = state[20];
+ Ase = state[21];
+ Asi = state[22];
+ Aso = state[23];
+ Asu = state[24];
+
+ do
+ {
+ /* prepareTheta */
+ BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa;
+ BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase;
+ BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi;
+ BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso;
+ BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu;
+
+ /* thetaRhoPiChiIotaPrepareTheta(round , A, E) */
+ Da = BCu ^ ROL64(BCe, 1);
+ De = BCa ^ ROL64(BCi, 1);
+ Di = BCe ^ ROL64(BCo, 1);
+ Do = BCi ^ ROL64(BCu, 1);
+ Du = BCo ^ ROL64(BCa, 1);
+
+ Aba ^= Da;
+ BCa = Aba;
+ Age ^= De;
+ BCe = ROL64(Age, 44);
+ Aki ^= Di;
+ BCi = ROL64(Aki, 43);
+ Amo ^= Do;
+ BCo = ROL64(Amo, 21);
+ Asu ^= Du;
+ BCu = ROL64(Asu, 14);
+ Eba = BCa ^ ANDN64(BCe, BCi);
+ Eba ^= *(round_consts++);
+ Ebe = BCe ^ ANDN64(BCi, BCo);
+ Ebi = BCi ^ ANDN64(BCo, BCu);
+ Ebo = BCo ^ ANDN64(BCu, BCa);
+ Ebu = BCu ^ ANDN64(BCa, BCe);
+
+ Abo ^= Do;
+ BCa = ROL64(Abo, 28);
+ Agu ^= Du;
+ BCe = ROL64(Agu, 20);
+ Aka ^= Da;
+ BCi = ROL64(Aka, 3);
+ Ame ^= De;
+ BCo = ROL64(Ame, 45);
+ Asi ^= Di;
+ BCu = ROL64(Asi, 61);
+ Ega = BCa ^ ANDN64(BCe, BCi);
+ Ege = BCe ^ ANDN64(BCi, BCo);
+ Egi = BCi ^ ANDN64(BCo, BCu);
+ Ego = BCo ^ ANDN64(BCu, BCa);
+ Egu = BCu ^ ANDN64(BCa, BCe);
+
+ Abe ^= De;
+ BCa = ROL64(Abe, 1);
+ Agi ^= Di;
+ BCe = ROL64(Agi, 6);
+ Ako ^= Do;
+ BCi = ROL64(Ako, 25);
+ Amu ^= Du;
+ BCo = ROL64(Amu, 8);
+ Asa ^= Da;
+ BCu = ROL64(Asa, 18);
+ Eka = BCa ^ ANDN64(BCe, BCi);
+ Eke = BCe ^ ANDN64(BCi, BCo);
+ Eki = BCi ^ ANDN64(BCo, BCu);
+ Eko = BCo ^ ANDN64(BCu, BCa);
+ Eku = BCu ^ ANDN64(BCa, BCe);
+
+ Abu ^= Du;
+ BCa = ROL64(Abu, 27);
+ Aga ^= Da;
+ BCe = ROL64(Aga, 36);
+ Ake ^= De;
+ BCi = ROL64(Ake, 10);
+ Ami ^= Di;
+ BCo = ROL64(Ami, 15);
+ Aso ^= Do;
+ BCu = ROL64(Aso, 56);
+ Ema = BCa ^ ANDN64(BCe, BCi);
+ Eme = BCe ^ ANDN64(BCi, BCo);
+ Emi = BCi ^ ANDN64(BCo, BCu);
+ Emo = BCo ^ ANDN64(BCu, BCa);
+ Emu = BCu ^ ANDN64(BCa, BCe);
+
+ Abi ^= Di;
+ BCa = ROL64(Abi, 62);
+ Ago ^= Do;
+ BCe = ROL64(Ago, 55);
+ Aku ^= Du;
+ BCi = ROL64(Aku, 39);
+ Ama ^= Da;
+ BCo = ROL64(Ama, 41);
+ Ase ^= De;
+ BCu = ROL64(Ase, 2);
+ Esa = BCa ^ ANDN64(BCe, BCi);
+ Ese = BCe ^ ANDN64(BCi, BCo);
+ Esi = BCi ^ ANDN64(BCo, BCu);
+ Eso = BCo ^ ANDN64(BCu, BCa);
+ Esu = BCu ^ ANDN64(BCa, BCe);
+
+ /* prepareTheta */
+ BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa;
+ BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese;
+ BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi;
+ BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso;
+ BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu;
+
+ /* thetaRhoPiChiIotaPrepareTheta(round+1, E, A) */
+ Da = BCu ^ ROL64(BCe, 1);
+ De = BCa ^ ROL64(BCi, 1);
+ Di = BCe ^ ROL64(BCo, 1);
+ Do = BCi ^ ROL64(BCu, 1);
+ Du = BCo ^ ROL64(BCa, 1);
+
+ Eba ^= Da;
+ BCa = Eba;
+ Ege ^= De;
+ BCe = ROL64(Ege, 44);
+ Eki ^= Di;
+ BCi = ROL64(Eki, 43);
+ Emo ^= Do;
+ BCo = ROL64(Emo, 21);
+ Esu ^= Du;
+ BCu = ROL64(Esu, 14);
+ Aba = BCa ^ ANDN64(BCe, BCi);
+ Aba ^= *(round_consts++);
+ Abe = BCe ^ ANDN64(BCi, BCo);
+ Abi = BCi ^ ANDN64(BCo, BCu);
+ Abo = BCo ^ ANDN64(BCu, BCa);
+ Abu = BCu ^ ANDN64(BCa, BCe);
+
+ Ebo ^= Do;
+ BCa = ROL64(Ebo, 28);
+ Egu ^= Du;
+ BCe = ROL64(Egu, 20);
+ Eka ^= Da;
+ BCi = ROL64(Eka, 3);
+ Eme ^= De;
+ BCo = ROL64(Eme, 45);
+ Esi ^= Di;
+ BCu = ROL64(Esi, 61);
+ Aga = BCa ^ ANDN64(BCe, BCi);
+ Age = BCe ^ ANDN64(BCi, BCo);
+ Agi = BCi ^ ANDN64(BCo, BCu);
+ Ago = BCo ^ ANDN64(BCu, BCa);
+ Agu = BCu ^ ANDN64(BCa, BCe);
+
+ Ebe ^= De;
+ BCa = ROL64(Ebe, 1);
+ Egi ^= Di;
+ BCe = ROL64(Egi, 6);
+ Eko ^= Do;
+ BCi = ROL64(Eko, 25);
+ Emu ^= Du;
+ BCo = ROL64(Emu, 8);
+ Esa ^= Da;
+ BCu = ROL64(Esa, 18);
+ Aka = BCa ^ ANDN64(BCe, BCi);
+ Ake = BCe ^ ANDN64(BCi, BCo);
+ Aki = BCi ^ ANDN64(BCo, BCu);
+ Ako = BCo ^ ANDN64(BCu, BCa);
+ Aku = BCu ^ ANDN64(BCa, BCe);
+
+ Ebu ^= Du;
+ BCa = ROL64(Ebu, 27);
+ Ega ^= Da;
+ BCe = ROL64(Ega, 36);
+ Eke ^= De;
+ BCi = ROL64(Eke, 10);
+ Emi ^= Di;
+ BCo = ROL64(Emi, 15);
+ Eso ^= Do;
+ BCu = ROL64(Eso, 56);
+ Ama = BCa ^ ANDN64(BCe, BCi);
+ Ame = BCe ^ ANDN64(BCi, BCo);
+ Ami = BCi ^ ANDN64(BCo, BCu);
+ Amo = BCo ^ ANDN64(BCu, BCa);
+ Amu = BCu ^ ANDN64(BCa, BCe);
+
+ Ebi ^= Di;
+ BCa = ROL64(Ebi, 62);
+ Ego ^= Do;
+ BCe = ROL64(Ego, 55);
+ Eku ^= Du;
+ BCi = ROL64(Eku, 39);
+ Ema ^= Da;
+ BCo = ROL64(Ema, 41);
+ Ese ^= De;
+ BCu = ROL64(Ese, 2);
+ Asa = BCa ^ ANDN64(BCe, BCi);
+ Ase = BCe ^ ANDN64(BCi, BCo);
+ Asi = BCi ^ ANDN64(BCo, BCu);
+ Aso = BCo ^ ANDN64(BCu, BCa);
+ Asu = BCu ^ ANDN64(BCa, BCe);
+ }
+ while (round_consts < round_consts_end);
+
+ state[0] = Aba;
+ state[1] = Abe;
+ state[2] = Abi;
+ state[3] = Abo;
+ state[4] = Abu;
+ state[5] = Aga;
+ state[6] = Age;
+ state[7] = Agi;
+ state[8] = Ago;
+ state[9] = Agu;
+ state[10] = Aka;
+ state[11] = Ake;
+ state[12] = Aki;
+ state[13] = Ako;
+ state[14] = Aku;
+ state[15] = Ama;
+ state[16] = Ame;
+ state[17] = Ami;
+ state[18] = Amo;
+ state[19] = Amu;
+ state[20] = Asa;
+ state[21] = Ase;
+ state[22] = Asi;
+ state[23] = Aso;
+ state[24] = Asu;
+
+ return sizeof(void *) * 4 + sizeof(u64) * 12 * 5;
+}
+
+static unsigned int
+KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes,
+ unsigned int nlanes, int blocklanes)
+{
+ unsigned int burn = 0;
+
+ while (nlanes)
+ {
+ switch (blocklanes)
+ {
+ case 21:
+ /* SHAKE128 */
+ while (pos == 0 && nlanes >= 21)
+ {
+ nlanes -= 21;
+ absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+ absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8;
+ absorb_lanes64_4(&hd->u.state64[16], lanes); lanes += 8 * 4;
+ absorb_lanes64_1(&hd->u.state64[20], lanes); lanes += 8 * 1;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 18:
+ /* SHA3-224 */
+ while (pos == 0 && nlanes >= 18)
+ {
+ nlanes -= 18;
+ absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+ absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8;
+ absorb_lanes64_2(&hd->u.state64[16], lanes); lanes += 8 * 2;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 17:
+ /* SHA3-256 & SHAKE256 */
+ while (pos == 0 && nlanes >= 17)
+ {
+ nlanes -= 17;
+ absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+ absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8;
+ absorb_lanes64_1(&hd->u.state64[16], lanes); lanes += 8 * 1;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 13:
+ /* SHA3-384 */
+ while (pos == 0 && nlanes >= 13)
+ {
+ nlanes -= 13;
+ absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+ absorb_lanes64_4(&hd->u.state64[8], lanes); lanes += 8 * 4;
+ absorb_lanes64_1(&hd->u.state64[12], lanes); lanes += 8 * 1;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 9:
+ /* SHA3-512 */
+ while (pos == 0 && nlanes >= 9)
+ {
+ nlanes -= 9;
+ absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+ absorb_lanes64_1(&hd->u.state64[8], lanes); lanes += 8 * 1;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+ }
+
+ while (nlanes)
+ {
+ hd->u.state64[pos] ^= buf_get_le64(lanes);
+ lanes += 8;
+ nlanes--;
+
+ if (++pos == blocklanes)
+ {
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ pos = 0;
+ break;
+ }
+ }
+ }
+
+ return burn;
+}
diff --git a/comm/third_party/libgcrypt/cipher/mac-cmac.c b/comm/third_party/libgcrypt/cipher/mac-cmac.c
new file mode 100644
index 0000000000..8d5d5ca304
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/mac-cmac.c
@@ -0,0 +1,524 @@
+/* mac-cmac.c - CMAC glue for MAC API
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "./mac-internal.h"
+
+
+static int
+map_mac_algo_to_cipher (int mac_algo)
+{
+ switch (mac_algo)
+ {
+ default:
+ return GCRY_CIPHER_NONE;
+ case GCRY_MAC_CMAC_AES:
+ return GCRY_CIPHER_AES;
+ case GCRY_MAC_CMAC_3DES:
+ return GCRY_CIPHER_3DES;
+ case GCRY_MAC_CMAC_CAMELLIA:
+ return GCRY_CIPHER_CAMELLIA128;
+ case GCRY_MAC_CMAC_IDEA:
+ return GCRY_CIPHER_IDEA;
+ case GCRY_MAC_CMAC_CAST5:
+ return GCRY_CIPHER_CAST5;
+ case GCRY_MAC_CMAC_BLOWFISH:
+ return GCRY_CIPHER_BLOWFISH;
+ case GCRY_MAC_CMAC_TWOFISH:
+ return GCRY_CIPHER_TWOFISH;
+ case GCRY_MAC_CMAC_SERPENT:
+ return GCRY_CIPHER_SERPENT128;
+ case GCRY_MAC_CMAC_SEED:
+ return GCRY_CIPHER_SEED;
+ case GCRY_MAC_CMAC_RFC2268:
+ return GCRY_CIPHER_RFC2268_128;
+ case GCRY_MAC_CMAC_GOST28147:
+ return GCRY_CIPHER_GOST28147;
+ case GCRY_MAC_CMAC_SM4:
+ return GCRY_CIPHER_SM4;
+ }
+}
+
+
+static gcry_err_code_t
+cmac_open (gcry_mac_hd_t h)
+{
+ gcry_err_code_t err;
+ gcry_cipher_hd_t hd;
+ int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+ int cipher_algo;
+ unsigned int flags;
+
+ cipher_algo = map_mac_algo_to_cipher (h->spec->algo);
+ flags = (secure ? GCRY_CIPHER_SECURE : 0);
+
+ err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_CMAC,
+ flags);
+ if (err)
+ return err;
+
+ h->u.cmac.cipher_algo = cipher_algo;
+ h->u.cmac.ctx = hd;
+ h->u.cmac.blklen = _gcry_cipher_get_algo_blklen (cipher_algo);
+ return 0;
+}
+
+
+static void
+cmac_close (gcry_mac_hd_t h)
+{
+ _gcry_cipher_close (h->u.cmac.ctx);
+ h->u.cmac.ctx = NULL;
+}
+
+
+static gcry_err_code_t
+cmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+ return _gcry_cipher_setkey (h->u.cmac.ctx, key, keylen);
+}
+
+
+static gcry_err_code_t
+cmac_reset (gcry_mac_hd_t h)
+{
+ return _gcry_cipher_reset (h->u.cmac.ctx);
+}
+
+
+static gcry_err_code_t
+cmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ return _gcry_cipher_cmac_authenticate (h->u.cmac.ctx, buf, buflen);
+}
+
+
+static gcry_err_code_t
+cmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+ if (*outlen > h->u.cmac.blklen)
+ *outlen = h->u.cmac.blklen;
+ return _gcry_cipher_cmac_get_tag (h->u.cmac.ctx, outbuf, *outlen);
+}
+
+
+static gcry_err_code_t
+cmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ return _gcry_cipher_cmac_check_tag (h->u.cmac.ctx, buf, buflen);
+}
+
+
+static unsigned int
+cmac_get_maclen (int algo)
+{
+ return _gcry_cipher_get_algo_blklen (map_mac_algo_to_cipher (algo));
+}
+
+
+static unsigned int
+cmac_get_keylen (int algo)
+{
+ return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo));
+}
+
+
+/* Check one CMAC with MAC ALGO using the regular MAC
+ * API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key
+ * and (EXPECT,EXPECTLEN) the expected result. Returns NULL on
+ * success or a string describing the failure. */
+static const char *
+check_one (int algo, const char *data, size_t datalen,
+ const char *key, size_t keylen,
+ const char *expect, size_t expectlen)
+{
+ gcry_mac_hd_t hd;
+ unsigned char mac[512]; /* hardcoded to avoid allocation */
+ unsigned int maclen;
+ size_t macoutlen;
+ int i;
+ gcry_error_t err = 0;
+
+ err = _gcry_mac_open (&hd, algo, 0, NULL);
+ if (err)
+ return "gcry_mac_open failed";
+
+ i = _gcry_mac_get_algo (hd);
+ if (i != algo)
+ return "gcry_mac_get_algo failed";
+
+ maclen = _gcry_mac_get_algo_maclen (algo);
+ if (maclen < 1 || maclen > 500)
+ return "gcry_mac_get_algo_maclen failed";
+
+ if (maclen != expectlen)
+ return "invalid tests data";
+
+ err = _gcry_mac_setkey (hd, key, keylen);
+ if (err)
+ {
+ _gcry_mac_close (hd);
+ return "gcry_mac_setkey failed";
+ }
+
+ err = _gcry_mac_write (hd, data, datalen);
+ if (err)
+ {
+ _gcry_mac_close (hd);
+ return "gcry_mac_write failed";
+ }
+
+ err = _gcry_mac_verify (hd, expect, maclen);
+ if (err)
+ {
+ _gcry_mac_close (hd);
+ return "gcry_mac_verify failed";
+ }
+
+ macoutlen = maclen;
+ err = _gcry_mac_read (hd, mac, &macoutlen);
+ _gcry_mac_close (hd);
+ if (err)
+ return "gcry_mac_read failed";
+
+ if (memcmp (mac, expect, maclen))
+ return "does not match";
+
+ return NULL;
+}
+
+
+/*
+ * CMAC AES and DES test vectors are from
+ * http://web.archive.org/web/20130930212819/http://csrc.nist.gov/publica \
+ * tions/nistpubs/800-38B/Updated_CMAC_Examples.pdf
+ */
+
+static gpg_err_code_t
+selftests_cmac_3des (int extended, selftest_report_func_t report)
+{
+ static const struct
+ {
+ const char *desc;
+ const char *data;
+ const char *key;
+ const char *expect;
+ } tv[] =
+ {
+ { "Basic 3DES",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57",
+ "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+ "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+ "\x74\x3d\xdb\xe0\xce\x2d\xc2\xed" },
+ { "Extended 3DES #1",
+ "",
+ "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+ "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+ "\xb7\xa6\x88\xe1\x22\xff\xaf\x95" },
+ { "Extended 3DES #2",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96",
+ "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+ "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+ "\x8e\x8f\x29\x31\x36\x28\x37\x97" },
+ { "Extended 3DES #3",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51",
+ "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+ "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+ "\x33\xe6\xb1\x09\x24\x00\xea\xe5" },
+ { "Extended 3DES #4",
+ "",
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+ "\xbd\x2e\xbf\x9a\x3b\xa0\x03\x61" },
+ { "Extended 3DES #5",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96",
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+ "\x4f\xf2\xab\x81\x3c\x53\xce\x83" },
+ { "Extended 3DES #6",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57",
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+ "\x62\xdd\x1b\x47\x19\x02\xbd\x4e" },
+ { "Extended 3DES #7",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51",
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+ "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+ "\x31\xb1\xe4\x31\xda\xbc\x4e\xb8" },
+ { NULL }
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+
+ for (tvidx=0; tv[tvidx].desc; tvidx++)
+ {
+ what = tv[tvidx].desc;
+ errtxt = check_one (GCRY_MAC_CMAC_3DES,
+ tv[tvidx].data, strlen (tv[tvidx].data),
+ tv[tvidx].key, strlen (tv[tvidx].key),
+ tv[tvidx].expect, 8);
+ if (errtxt)
+ goto failed;
+ if (!extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("cmac", GCRY_MAC_CMAC_3DES, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+static gpg_err_code_t
+selftests_cmac_aes (int extended, selftest_report_func_t report)
+{
+ static const struct
+ {
+ const char *desc;
+ const char *data;
+ const char *key;
+ const char *expect;
+ } tv[] =
+ {
+ { "Basic AES128",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+ "\xdf\xa6\x67\x47\xde\x9a\xe6\x30\x30\xca\x32\x61\x14\x97\xc8\x27" },
+ { "Basic AES192",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
+ "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+ "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+ "\x8a\x1d\xe5\xbe\x2e\xb3\x1a\xad\x08\x9a\x82\xe6\xee\x90\x8b\x0e" },
+ { "Basic AES256",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
+ "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+ "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+ "\xaa\xf3\xd8\xf1\xde\x56\x40\xc2\x32\xf5\xb1\x69\xb9\xc9\x11\xe6" },
+ { "Extended AES #1",
+ "",
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+ "\xbb\x1d\x69\x29\xe9\x59\x37\x28\x7f\xa3\x7d\x12\x9b\x75\x67\x46" },
+ { "Extended AES #2",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+ "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+ "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+ "\x9e\x99\xa7\xbf\x31\xe7\x10\x90\x06\x62\xf6\x5e\x61\x7c\x51\x84" },
+ { "Extended AES #3",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+ "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+ "\xe1\x99\x21\x90\x54\x9f\x6e\xd5\x69\x6a\x2c\x05\x6c\x31\x54\x10" },
+ { "Extended AES #4",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+ "\x07\x0a\x16\xb4\x6b\x4d\x41\x44\xf7\x9b\xdd\x9d\xd0\x4a\x28\x7c" },
+ { "Extended AES #5",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+ "\x51\xf0\xbe\xbf\x7e\x3b\x9d\x92\xfc\x49\x74\x17\x79\x36\x3c\xfe" },
+ { "Extended AES #6",
+ "",
+ "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+ "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+ "\xd1\x7d\xdf\x46\xad\xaa\xcd\xe5\x31\xca\xc4\x83\xde\x7a\x93\x67" },
+ { "Extended AES #7",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+ "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+ "\xa1\xd5\xdf\x0e\xed\x79\x0f\x79\x4d\x77\x58\x96\x59\xf3\x9a\x11" },
+ { "Extended AES #8",
+ "",
+ "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+ "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+ "\x02\x89\x62\xf6\x1b\x7b\xf8\x9e\xfc\x6b\x55\x1f\x46\x67\xd9\x83" },
+ { "Extended AES #9",
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+ "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+ "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+ "\x28\xa7\x02\x3f\x45\x2e\x8f\x82\xbd\x4b\xf2\x8d\x8c\x37\xc3\x5c" },
+ { NULL }
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+
+ for (tvidx=0; tv[tvidx].desc; tvidx++)
+ {
+ what = tv[tvidx].desc;
+ errtxt = check_one (GCRY_MAC_CMAC_AES,
+ tv[tvidx].data, strlen (tv[tvidx].data),
+ tv[tvidx].key, strlen (tv[tvidx].key),
+ tv[tvidx].expect, strlen (tv[tvidx].expect));
+ if (errtxt)
+ goto failed;
+ if (tvidx >= 2 && !extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("cmac", GCRY_MAC_CMAC_AES, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+cmac_selftest (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_MAC_CMAC_3DES:
+ ec = selftests_cmac_3des (extended, report);
+ break;
+ case GCRY_MAC_CMAC_AES:
+ ec = selftests_cmac_aes (extended, report);
+ break;
+
+ default:
+ ec = GPG_ERR_MAC_ALGO;
+ break;
+ }
+
+ return ec;
+}
+
+
+static gcry_mac_spec_ops_t cmac_ops = {
+ cmac_open,
+ cmac_close,
+ cmac_setkey,
+ NULL,
+ cmac_reset,
+ cmac_write,
+ cmac_read,
+ cmac_verify,
+ cmac_get_maclen,
+ cmac_get_keylen,
+ NULL,
+ cmac_selftest
+};
+
+
+#if USE_BLOWFISH
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish = {
+ GCRY_MAC_CMAC_BLOWFISH, {0, 0}, "CMAC_BLOWFISH",
+ &cmac_ops
+};
+#endif
+#if USE_DES
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes = {
+ GCRY_MAC_CMAC_3DES, {0, 1}, "CMAC_3DES",
+ &cmac_ops
+};
+#endif
+#if USE_CAST5
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5 = {
+ GCRY_MAC_CMAC_CAST5, {0, 0}, "CMAC_CAST5",
+ &cmac_ops
+};
+#endif
+#if USE_AES
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes = {
+ GCRY_MAC_CMAC_AES, {0, 1}, "CMAC_AES",
+ &cmac_ops
+};
+#endif
+#if USE_TWOFISH
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish = {
+ GCRY_MAC_CMAC_TWOFISH, {0, 0}, "CMAC_TWOFISH",
+ &cmac_ops
+};
+#endif
+#if USE_SERPENT
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent = {
+ GCRY_MAC_CMAC_SERPENT, {0, 0}, "CMAC_SERPENT",
+ &cmac_ops
+};
+#endif
+#if USE_RFC2268
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268 = {
+ GCRY_MAC_CMAC_RFC2268, {0, 0}, "CMAC_RFC2268",
+ &cmac_ops
+};
+#endif
+#if USE_SEED
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed = {
+ GCRY_MAC_CMAC_SEED, {0, 0}, "CMAC_SEED",
+ &cmac_ops
+};
+#endif
+#if USE_CAMELLIA
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia = {
+ GCRY_MAC_CMAC_CAMELLIA, {0, 0}, "CMAC_CAMELLIA",
+ &cmac_ops
+};
+#endif
+#ifdef USE_IDEA
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea = {
+ GCRY_MAC_CMAC_IDEA, {0, 0}, "CMAC_IDEA",
+ &cmac_ops
+};
+#endif
+#if USE_GOST28147
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147 = {
+ GCRY_MAC_CMAC_GOST28147, {0, 0}, "CMAC_GOST28147",
+ &cmac_ops
+};
+#endif
+#if USE_SM4
+gcry_mac_spec_t _gcry_mac_type_spec_cmac_sm4 = {
+ GCRY_MAC_CMAC_SM4, {0, 0}, "CMAC_SM4",
+ &cmac_ops
+};
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/mac-gmac.c b/comm/third_party/libgcrypt/cipher/mac-gmac.c
new file mode 100644
index 0000000000..e04c6d1ef0
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/mac-gmac.c
@@ -0,0 +1,187 @@
+/* mac-gmac.c - GMAC glue for MAC API
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "./mac-internal.h"
+
+
+static int
+map_mac_algo_to_cipher (int mac_algo)
+{
+ switch (mac_algo)
+ {
+ default:
+ return GCRY_CIPHER_NONE;
+ case GCRY_MAC_GMAC_AES:
+ return GCRY_CIPHER_AES;
+ case GCRY_MAC_GMAC_CAMELLIA:
+ return GCRY_CIPHER_CAMELLIA128;
+ case GCRY_MAC_GMAC_TWOFISH:
+ return GCRY_CIPHER_TWOFISH;
+ case GCRY_MAC_GMAC_SERPENT:
+ return GCRY_CIPHER_SERPENT128;
+ case GCRY_MAC_GMAC_SEED:
+ return GCRY_CIPHER_SEED;
+ }
+}
+
+
+static gcry_err_code_t
+gmac_open (gcry_mac_hd_t h)
+{
+ gcry_err_code_t err;
+ gcry_cipher_hd_t hd;
+ int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+ int cipher_algo;
+ unsigned int flags;
+
+ cipher_algo = map_mac_algo_to_cipher (h->spec->algo);
+ flags = (secure ? GCRY_CIPHER_SECURE : 0);
+
+ err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_GCM,
+ flags);
+ if (err)
+ return err;
+
+ h->u.gmac.cipher_algo = cipher_algo;
+ h->u.gmac.ctx = hd;
+ return 0;
+}
+
+
+static void
+gmac_close (gcry_mac_hd_t h)
+{
+ _gcry_cipher_close (h->u.gmac.ctx);
+ h->u.gmac.ctx = NULL;
+}
+
+
+static gcry_err_code_t
+gmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+ return _gcry_cipher_setkey (h->u.gmac.ctx, key, keylen);
+}
+
+
+static gcry_err_code_t
+gmac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen)
+{
+ return _gcry_cipher_setiv (h->u.gmac.ctx, iv, ivlen);
+}
+
+
+static gcry_err_code_t
+gmac_reset (gcry_mac_hd_t h)
+{
+ return _gcry_cipher_reset (h->u.gmac.ctx);
+}
+
+
+static gcry_err_code_t
+gmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ return _gcry_cipher_authenticate (h->u.gmac.ctx, buf, buflen);
+}
+
+
+static gcry_err_code_t
+gmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+ if (*outlen > GCRY_GCM_BLOCK_LEN)
+ *outlen = GCRY_GCM_BLOCK_LEN;
+ return _gcry_cipher_gettag (h->u.gmac.ctx, outbuf, *outlen);
+}
+
+
+static gcry_err_code_t
+gmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ return _gcry_cipher_checktag (h->u.gmac.ctx, buf, buflen);
+}
+
+
+static unsigned int
+gmac_get_maclen (int algo)
+{
+ (void)algo;
+ return GCRY_GCM_BLOCK_LEN;
+}
+
+
+static unsigned int
+gmac_get_keylen (int algo)
+{
+ return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo));
+}
+
+
+static gcry_mac_spec_ops_t gmac_ops = {
+ gmac_open,
+ gmac_close,
+ gmac_setkey,
+ gmac_setiv,
+ gmac_reset,
+ gmac_write,
+ gmac_read,
+ gmac_verify,
+ gmac_get_maclen,
+ gmac_get_keylen,
+ NULL,
+ NULL
+};
+
+
+#if USE_AES
+gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes = {
+ GCRY_MAC_GMAC_AES, {0, 1}, "GMAC_AES",
+ &gmac_ops
+};
+#endif
+#if USE_TWOFISH
+gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish = {
+ GCRY_MAC_GMAC_TWOFISH, {0, 0}, "GMAC_TWOFISH",
+ &gmac_ops
+};
+#endif
+#if USE_SERPENT
+gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent = {
+ GCRY_MAC_GMAC_SERPENT, {0, 0}, "GMAC_SERPENT",
+ &gmac_ops
+};
+#endif
+#if USE_SEED
+gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed = {
+ GCRY_MAC_GMAC_SEED, {0, 0}, "GMAC_SEED",
+ &gmac_ops
+};
+#endif
+#if USE_CAMELLIA
+gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia = {
+ GCRY_MAC_GMAC_CAMELLIA, {0, 0}, "GMAC_CAMELLIA",
+ &gmac_ops
+};
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/mac-hmac.c b/comm/third_party/libgcrypt/cipher/mac-hmac.c
new file mode 100644
index 0000000000..4e10dd2c9e
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/mac-hmac.c
@@ -0,0 +1,1495 @@
+/* mac-hmac.c - HMAC glue for MAC API
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "./mac-internal.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hmac256.h"
+
+
+static int
+map_mac_algo_to_md (int mac_algo)
+{
+ switch (mac_algo)
+ {
+ default:
+ return GCRY_MD_NONE;
+ case GCRY_MAC_HMAC_MD2:
+ return GCRY_MD_MD2;
+ case GCRY_MAC_HMAC_MD4:
+ return GCRY_MD_MD4;
+ case GCRY_MAC_HMAC_MD5:
+ return GCRY_MD_MD5;
+ case GCRY_MAC_HMAC_SHA1:
+ return GCRY_MD_SHA1;
+ case GCRY_MAC_HMAC_SHA224:
+ return GCRY_MD_SHA224;
+ case GCRY_MAC_HMAC_SHA256:
+ return GCRY_MD_SHA256;
+ case GCRY_MAC_HMAC_SHA384:
+ return GCRY_MD_SHA384;
+ case GCRY_MAC_HMAC_SHA512:
+ return GCRY_MD_SHA512;
+ case GCRY_MAC_HMAC_SHA512_256:
+ return GCRY_MD_SHA512_256;
+ case GCRY_MAC_HMAC_SHA512_224:
+ return GCRY_MD_SHA512_224;
+ case GCRY_MAC_HMAC_SHA3_224:
+ return GCRY_MD_SHA3_224;
+ case GCRY_MAC_HMAC_SHA3_256:
+ return GCRY_MD_SHA3_256;
+ case GCRY_MAC_HMAC_SHA3_384:
+ return GCRY_MD_SHA3_384;
+ case GCRY_MAC_HMAC_SHA3_512:
+ return GCRY_MD_SHA3_512;
+ case GCRY_MAC_HMAC_RMD160:
+ return GCRY_MD_RMD160;
+ case GCRY_MAC_HMAC_TIGER1:
+ return GCRY_MD_TIGER1;
+ case GCRY_MAC_HMAC_WHIRLPOOL:
+ return GCRY_MD_WHIRLPOOL;
+ case GCRY_MAC_HMAC_GOSTR3411_94:
+ return GCRY_MD_GOSTR3411_94;
+ case GCRY_MAC_HMAC_GOSTR3411_CP:
+ return GCRY_MD_GOSTR3411_CP;
+ case GCRY_MAC_HMAC_STRIBOG256:
+ return GCRY_MD_STRIBOG256;
+ case GCRY_MAC_HMAC_STRIBOG512:
+ return GCRY_MD_STRIBOG512;
+ case GCRY_MAC_HMAC_BLAKE2B_512:
+ return GCRY_MD_BLAKE2B_512;
+ case GCRY_MAC_HMAC_BLAKE2B_384:
+ return GCRY_MD_BLAKE2B_384;
+ case GCRY_MAC_HMAC_BLAKE2B_256:
+ return GCRY_MD_BLAKE2B_256;
+ case GCRY_MAC_HMAC_BLAKE2B_160:
+ return GCRY_MD_BLAKE2B_160;
+ case GCRY_MAC_HMAC_BLAKE2S_256:
+ return GCRY_MD_BLAKE2S_256;
+ case GCRY_MAC_HMAC_BLAKE2S_224:
+ return GCRY_MD_BLAKE2S_224;
+ case GCRY_MAC_HMAC_BLAKE2S_160:
+ return GCRY_MD_BLAKE2S_160;
+ case GCRY_MAC_HMAC_BLAKE2S_128:
+ return GCRY_MD_BLAKE2S_128;
+ case GCRY_MAC_HMAC_SM3:
+ return GCRY_MD_SM3;
+ }
+}
+
+
+static gcry_err_code_t
+hmac_open (gcry_mac_hd_t h)
+{
+ gcry_err_code_t err;
+ gcry_md_hd_t hd;
+ int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+ unsigned int flags;
+ int md_algo;
+
+ md_algo = map_mac_algo_to_md (h->spec->algo);
+
+ flags = GCRY_MD_FLAG_HMAC;
+ flags |= (secure ? GCRY_MD_FLAG_SECURE : 0);
+
+ err = _gcry_md_open (&hd, md_algo, flags);
+ if (err)
+ return err;
+
+ h->u.hmac.md_algo = md_algo;
+ h->u.hmac.md_ctx = hd;
+ return 0;
+}
+
+
+static void
+hmac_close (gcry_mac_hd_t h)
+{
+ _gcry_md_close (h->u.hmac.md_ctx);
+ h->u.hmac.md_ctx = NULL;
+}
+
+
+static gcry_err_code_t
+hmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+ return _gcry_md_setkey (h->u.hmac.md_ctx, key, keylen);
+}
+
+
+static gcry_err_code_t
+hmac_reset (gcry_mac_hd_t h)
+{
+ _gcry_md_reset (h->u.hmac.md_ctx);
+ return 0;
+}
+
+
+static gcry_err_code_t
+hmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ _gcry_md_write (h->u.hmac.md_ctx, buf, buflen);
+ return 0;
+}
+
+
+static gcry_err_code_t
+hmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+ unsigned int dlen;
+ const unsigned char *digest;
+
+ dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo);
+ digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo);
+
+ if (*outlen <= dlen)
+ buf_cpy (outbuf, digest, *outlen);
+ else
+ {
+ buf_cpy (outbuf, digest, dlen);
+ *outlen = dlen;
+ }
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+hmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ unsigned int dlen;
+ const unsigned char *digest;
+
+ dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo);
+ digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo);
+
+ if (buflen > dlen)
+ return GPG_ERR_INV_LENGTH;
+
+ return buf_eq_const (buf, digest, buflen) ? 0 : GPG_ERR_CHECKSUM;
+}
+
+
+static unsigned int
+hmac_get_maclen (int algo)
+{
+ return _gcry_md_get_algo_dlen (map_mac_algo_to_md (algo));
+}
+
+
+static unsigned int
+hmac_get_keylen (int algo)
+{
+ /* Return blocksize for default key length. */
+ switch (algo)
+ {
+ case GCRY_MD_SHA3_224:
+ return 1152 / 8;
+ case GCRY_MD_SHA3_256:
+ return 1088 / 8;
+ case GCRY_MD_SHA3_384:
+ return 832 / 8;
+ case GCRY_MD_SHA3_512:
+ return 576 / 8;
+ case GCRY_MAC_HMAC_SHA384:
+ case GCRY_MAC_HMAC_SHA512:
+ return 128;
+ case GCRY_MAC_HMAC_GOSTR3411_94:
+ return 32;
+ default:
+ return 64;
+ }
+}
+
+
+/* Check one HMAC with digest ALGO using the regualr HAMC
+ * API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key
+ * and (EXPECT,EXPECTLEN) the expected result. If TRUNC is set, the
+ * EXPECTLEN may be less than the digest length. Returns NULL on
+ * success or a string describing the failure. */
+static const char *
+check_one (int algo,
+ const void *data, size_t datalen,
+ const void *key, size_t keylen,
+ const void *expect, size_t expectlen, int trunc)
+{
+ gcry_md_hd_t hd;
+ const unsigned char *digest;
+
+/* printf ("HMAC algo %d\n", algo); */
+ if (trunc)
+ {
+ if (_gcry_md_get_algo_dlen (algo) < expectlen)
+ return "invalid tests data";
+ }
+ else
+ {
+ if (_gcry_md_get_algo_dlen (algo) != expectlen)
+ return "invalid tests data";
+ }
+ if (_gcry_md_open (&hd, algo, GCRY_MD_FLAG_HMAC))
+ return "gcry_md_open failed";
+ if (_gcry_md_setkey (hd, key, keylen))
+ {
+ _gcry_md_close (hd);
+ return "gcry_md_setkey failed";
+ }
+ _gcry_md_write (hd, data, datalen);
+ digest = _gcry_md_read (hd, algo);
+ if (!digest)
+ {
+ _gcry_md_close (hd);
+ return "gcry_md_read failed";
+ }
+ if (memcmp (digest, expect, expectlen))
+ {
+/* int i; */
+
+/* fputs (" {", stdout); */
+/* for (i=0; i < expectlen-1; i++) */
+/* { */
+/* if (i && !(i % 8)) */
+/* fputs ("\n ", stdout); */
+/* printf (" 0x%02x,", digest[i]); */
+/* } */
+/* printf (" 0x%02x } },\n", digest[i]); */
+
+ _gcry_md_close (hd);
+ return "does not match";
+ }
+ _gcry_md_close (hd);
+ return NULL;
+}
+
+
+static gpg_err_code_t
+selftests_sha1 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+ unsigned char key[128];
+ int i, j;
+
+ what = "FIPS-198a, A.1";
+ for (i=0; i < 64; i++)
+ key[i] = i;
+ errtxt = check_one (GCRY_MD_SHA1,
+ "Sample #1", 9,
+ key, 64,
+ "\x4f\x4c\xa3\xd5\xd6\x8b\xa7\xcc\x0a\x12"
+ "\x08\xc9\xc6\x1e\x9c\x5d\xa0\x40\x3c\x0a", 20, 0);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "FIPS-198a, A.2";
+ for (i=0, j=0x30; i < 20; i++)
+ key[i] = j++;
+ errtxt = check_one (GCRY_MD_SHA1,
+ "Sample #2", 9,
+ key, 20,
+ "\x09\x22\xd3\x40\x5f\xaa\x3d\x19\x4f\x82"
+ "\xa4\x58\x30\x73\x7d\x5c\xc6\xc7\x5d\x24", 20, 0);
+ if (errtxt)
+ goto failed;
+
+ what = "FIPS-198a, A.3";
+ for (i=0, j=0x50; i < 100; i++)
+ key[i] = j++;
+ errtxt = check_one (GCRY_MD_SHA1,
+ "Sample #3", 9,
+ key, 100,
+ "\xbc\xf4\x1e\xab\x8b\xb2\xd8\x02\xf3\xd0"
+ "\x5c\xaf\x7c\xb0\x92\xec\xf8\xd1\xa3\xaa", 20, 0);
+ if (errtxt)
+ goto failed;
+
+ what = "FIPS-198a, A.4";
+ for (i=0, j=0x70; i < 49; i++)
+ key[i] = j++;
+ errtxt = check_one (GCRY_MD_SHA1,
+ "Sample #4", 9,
+ key, 49,
+ "\x9e\xa8\x86\xef\xe2\x68\xdb\xec\xce\x42"
+ "\x0c\x75\x24\xdf\x32\xe0\x75\x1a\x2a\x26", 20, 0);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("hmac", GCRY_MD_SHA1, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+static gpg_err_code_t
+selftests_sha224 (int extended, selftest_report_func_t report)
+{
+ static struct
+ {
+ const char * const desc;
+ const char * const data;
+ const char * const key;
+ const char expect[28];
+ } tv[] =
+ {
+ { "data-28 key-4",
+ "what do ya want for nothing?",
+ "Jefe",
+ { 0xa3, 0x0e, 0x01, 0x09, 0x8b, 0xc6, 0xdb, 0xbf,
+ 0x45, 0x69, 0x0f, 0x3a, 0x7e, 0x9e, 0x6d, 0x0f,
+ 0x8b, 0xbe, 0xa2, 0xa3, 0x9e, 0x61, 0x48, 0x00,
+ 0x8f, 0xd0, 0x5e, 0x44 } },
+
+ { "data-9 key-20",
+ "Hi There",
+ "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+ "\x0b\x0b\x0b\x0b",
+ { 0x89, 0x6f, 0xb1, 0x12, 0x8a, 0xbb, 0xdf, 0x19,
+ 0x68, 0x32, 0x10, 0x7c, 0xd4, 0x9d, 0xf3, 0x3f,
+ 0x47, 0xb4, 0xb1, 0x16, 0x99, 0x12, 0xba, 0x4f,
+ 0x53, 0x68, 0x4b, 0x22 } },
+
+ { "data-50 key-20",
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa",
+ { 0x7f, 0xb3, 0xcb, 0x35, 0x88, 0xc6, 0xc1, 0xf6,
+ 0xff, 0xa9, 0x69, 0x4d, 0x7d, 0x6a, 0xd2, 0x64,
+ 0x93, 0x65, 0xb0, 0xc1, 0xf6, 0x5d, 0x69, 0xd1,
+ 0xec, 0x83, 0x33, 0xea } },
+
+ { "data-50 key-26",
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+ "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+ { 0x6c, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3c, 0xac,
+ 0x6a, 0x2a, 0xbc, 0x1b, 0xb3, 0x82, 0x62, 0x7c,
+ 0xec, 0x6a, 0x90, 0xd8, 0x6e, 0xfc, 0x01, 0x2d,
+ 0xe7, 0xaf, 0xec, 0x5a } },
+
+ { "data-54 key-131",
+ "Test Using Larger Than Block-Size Key - Hash Key First",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0x95, 0xe9, 0xa0, 0xdb, 0x96, 0x20, 0x95, 0xad,
+ 0xae, 0xbe, 0x9b, 0x2d, 0x6f, 0x0d, 0xbc, 0xe2,
+ 0xd4, 0x99, 0xf1, 0x12, 0xf2, 0xd2, 0xb7, 0x27,
+ 0x3f, 0xa6, 0x87, 0x0e } },
+
+ { "data-152 key-131",
+ "This is a test using a larger than block-size key and a larger "
+ "than block-size data. The key needs to be hashed before being "
+ "used by the HMAC algorithm.",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0x3a, 0x85, 0x41, 0x66, 0xac, 0x5d, 0x9f, 0x02,
+ 0x3f, 0x54, 0xd5, 0x17, 0xd0, 0xb3, 0x9d, 0xbd,
+ 0x94, 0x67, 0x70, 0xdb, 0x9c, 0x2b, 0x95, 0xc9,
+ 0xf6, 0xf5, 0x65, 0xd1 } },
+
+ { NULL }
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+
+ for (tvidx=0; tv[tvidx].desc; tvidx++)
+ {
+ what = tv[tvidx].desc;
+ errtxt = check_one (GCRY_MD_SHA224,
+ tv[tvidx].data, strlen (tv[tvidx].data),
+ tv[tvidx].key, strlen (tv[tvidx].key),
+ tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+ if (errtxt)
+ goto failed;
+ if (!extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("hmac", GCRY_MD_SHA224, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+selftests_sha256 (int extended, selftest_report_func_t report)
+{
+ static struct
+ {
+ const char * const desc;
+ const char * const data;
+ const char * const key;
+ const char expect[32];
+ } tv[] =
+ {
+ { "data-28 key-4",
+ "what do ya want for nothing?",
+ "Jefe",
+ { 0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e,
+ 0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7,
+ 0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83,
+ 0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43 } },
+
+ { "data-9 key-20",
+ "Hi There",
+ "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+ "\x0b\x0b\x0b\x0b",
+ { 0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53,
+ 0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b,
+ 0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7,
+ 0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7 } },
+
+ { "data-50 key-20",
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa",
+ { 0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46,
+ 0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7,
+ 0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22,
+ 0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe } },
+
+ { "data-50 key-26",
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+ "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+ { 0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e,
+ 0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a,
+ 0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07,
+ 0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b } },
+
+ { "data-54 key-131",
+ "Test Using Larger Than Block-Size Key - Hash Key First",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f,
+ 0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f,
+ 0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14,
+ 0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54 } },
+
+ { "data-152 key-131",
+ "This is a test using a larger than block-size key and a larger "
+ "than block-size data. The key needs to be hashed before being "
+ "used by the HMAC algorithm.",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb,
+ 0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44,
+ 0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93,
+ 0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2 } },
+
+ { NULL }
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+
+ for (tvidx=0; tv[tvidx].desc; tvidx++)
+ {
+ hmac256_context_t hmachd;
+ const unsigned char *digest;
+ size_t dlen;
+
+ what = tv[tvidx].desc;
+ errtxt = check_one (GCRY_MD_SHA256,
+ tv[tvidx].data, strlen (tv[tvidx].data),
+ tv[tvidx].key, strlen (tv[tvidx].key),
+ tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+ if (errtxt)
+ goto failed;
+
+ hmachd = _gcry_hmac256_new (tv[tvidx].key, strlen (tv[tvidx].key));
+ if (!hmachd)
+ {
+ errtxt = "_gcry_hmac256_new failed";
+ goto failed;
+ }
+ _gcry_hmac256_update (hmachd, tv[tvidx].data, strlen (tv[tvidx].data));
+ digest = _gcry_hmac256_finalize (hmachd, &dlen);
+ if (!digest)
+ {
+ errtxt = "_gcry_hmac256_finalize failed";
+ _gcry_hmac256_release (hmachd);
+ goto failed;
+ }
+ if (dlen != DIM (tv[tvidx].expect)
+ || memcmp (digest, tv[tvidx].expect, DIM (tv[tvidx].expect)))
+ {
+ errtxt = "does not match in second implementation";
+ _gcry_hmac256_release (hmachd);
+ goto failed;
+ }
+ _gcry_hmac256_release (hmachd);
+
+ if (!extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("hmac", GCRY_MD_SHA256, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+selftests_sha384 (int extended, selftest_report_func_t report)
+{
+ static struct
+ {
+ const char * const desc;
+ const char * const data;
+ const char * const key;
+ const char expect[48];
+ } tv[] =
+ {
+ { "data-28 key-4",
+ "what do ya want for nothing?",
+ "Jefe",
+ { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31,
+ 0x61, 0x7f, 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b,
+ 0x9c, 0x7e, 0xf4, 0x64, 0xf5, 0xa0, 0x1b, 0x47,
+ 0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22, 0x44, 0x5e,
+ 0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7,
+ 0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 } },
+
+ { "data-9 key-20",
+ "Hi There",
+ "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+ "\x0b\x0b\x0b\x0b",
+ { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62,
+ 0x6b, 0x08, 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f,
+ 0x15, 0xf9, 0xda, 0xdb, 0xe4, 0x10, 0x1e, 0xc6,
+ 0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb, 0xc5, 0x9c,
+ 0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f,
+ 0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 } },
+
+ { "data-50 key-20",
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa",
+ { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a,
+ 0x0a, 0xa2, 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f,
+ 0x0a, 0xa6, 0x35, 0xd9, 0x47, 0xac, 0x9f, 0xeb,
+ 0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66, 0x14, 0x4b,
+ 0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9,
+ 0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 } },
+
+ { "data-50 key-26",
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+ "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+ { 0x3e, 0x8a, 0x69, 0xb7, 0x78, 0x3c, 0x25, 0x85,
+ 0x19, 0x33, 0xab, 0x62, 0x90, 0xaf, 0x6c, 0xa7,
+ 0x7a, 0x99, 0x81, 0x48, 0x08, 0x50, 0x00, 0x9c,
+ 0xc5, 0x57, 0x7c, 0x6e, 0x1f, 0x57, 0x3b, 0x4e,
+ 0x68, 0x01, 0xdd, 0x23, 0xc4, 0xa7, 0xd6, 0x79,
+ 0xcc, 0xf8, 0xa3, 0x86, 0xc6, 0x74, 0xcf, 0xfb } },
+
+ { "data-54 key-131",
+ "Test Using Larger Than Block-Size Key - Hash Key First",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0x4e, 0xce, 0x08, 0x44, 0x85, 0x81, 0x3e, 0x90,
+ 0x88, 0xd2, 0xc6, 0x3a, 0x04, 0x1b, 0xc5, 0xb4,
+ 0x4f, 0x9e, 0xf1, 0x01, 0x2a, 0x2b, 0x58, 0x8f,
+ 0x3c, 0xd1, 0x1f, 0x05, 0x03, 0x3a, 0xc4, 0xc6,
+ 0x0c, 0x2e, 0xf6, 0xab, 0x40, 0x30, 0xfe, 0x82,
+ 0x96, 0x24, 0x8d, 0xf1, 0x63, 0xf4, 0x49, 0x52 } },
+
+ { "data-152 key-131",
+ "This is a test using a larger than block-size key and a larger "
+ "than block-size data. The key needs to be hashed before being "
+ "used by the HMAC algorithm.",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0x66, 0x17, 0x17, 0x8e, 0x94, 0x1f, 0x02, 0x0d,
+ 0x35, 0x1e, 0x2f, 0x25, 0x4e, 0x8f, 0xd3, 0x2c,
+ 0x60, 0x24, 0x20, 0xfe, 0xb0, 0xb8, 0xfb, 0x9a,
+ 0xdc, 0xce, 0xbb, 0x82, 0x46, 0x1e, 0x99, 0xc5,
+ 0xa6, 0x78, 0xcc, 0x31, 0xe7, 0x99, 0x17, 0x6d,
+ 0x38, 0x60, 0xe6, 0x11, 0x0c, 0x46, 0x52, 0x3e } },
+
+ { NULL }
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+
+ for (tvidx=0; tv[tvidx].desc; tvidx++)
+ {
+ what = tv[tvidx].desc;
+ errtxt = check_one (GCRY_MD_SHA384,
+ tv[tvidx].data, strlen (tv[tvidx].data),
+ tv[tvidx].key, strlen (tv[tvidx].key),
+ tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+ if (errtxt)
+ goto failed;
+ if (!extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("hmac", GCRY_MD_SHA384, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+selftests_sha512 (int extended, selftest_report_func_t report)
+{
+ static struct
+ {
+ const char * const desc;
+ const char * const data;
+ const char * const key;
+ const char expect[64];
+ } tv[] =
+ {
+ { "data-28 key-4",
+ "what do ya want for nothing?",
+ "Jefe",
+ { 0x16, 0x4b, 0x7a, 0x7b, 0xfc, 0xf8, 0x19, 0xe2,
+ 0xe3, 0x95, 0xfb, 0xe7, 0x3b, 0x56, 0xe0, 0xa3,
+ 0x87, 0xbd, 0x64, 0x22, 0x2e, 0x83, 0x1f, 0xd6,
+ 0x10, 0x27, 0x0c, 0xd7, 0xea, 0x25, 0x05, 0x54,
+ 0x97, 0x58, 0xbf, 0x75, 0xc0, 0x5a, 0x99, 0x4a,
+ 0x6d, 0x03, 0x4f, 0x65, 0xf8, 0xf0, 0xe6, 0xfd,
+ 0xca, 0xea, 0xb1, 0xa3, 0x4d, 0x4a, 0x6b, 0x4b,
+ 0x63, 0x6e, 0x07, 0x0a, 0x38, 0xbc, 0xe7, 0x37 } },
+
+ { "data-9 key-20",
+ "Hi There",
+ "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+ "\x0b\x0b\x0b\x0b",
+ { 0x87, 0xaa, 0x7c, 0xde, 0xa5, 0xef, 0x61, 0x9d,
+ 0x4f, 0xf0, 0xb4, 0x24, 0x1a, 0x1d, 0x6c, 0xb0,
+ 0x23, 0x79, 0xf4, 0xe2, 0xce, 0x4e, 0xc2, 0x78,
+ 0x7a, 0xd0, 0xb3, 0x05, 0x45, 0xe1, 0x7c, 0xde,
+ 0xda, 0xa8, 0x33, 0xb7, 0xd6, 0xb8, 0xa7, 0x02,
+ 0x03, 0x8b, 0x27, 0x4e, 0xae, 0xa3, 0xf4, 0xe4,
+ 0xbe, 0x9d, 0x91, 0x4e, 0xeb, 0x61, 0xf1, 0x70,
+ 0x2e, 0x69, 0x6c, 0x20, 0x3a, 0x12, 0x68, 0x54 } },
+
+ { "data-50 key-20",
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa",
+ { 0xfa, 0x73, 0xb0, 0x08, 0x9d, 0x56, 0xa2, 0x84,
+ 0xef, 0xb0, 0xf0, 0x75, 0x6c, 0x89, 0x0b, 0xe9,
+ 0xb1, 0xb5, 0xdb, 0xdd, 0x8e, 0xe8, 0x1a, 0x36,
+ 0x55, 0xf8, 0x3e, 0x33, 0xb2, 0x27, 0x9d, 0x39,
+ 0xbf, 0x3e, 0x84, 0x82, 0x79, 0xa7, 0x22, 0xc8,
+ 0x06, 0xb4, 0x85, 0xa4, 0x7e, 0x67, 0xc8, 0x07,
+ 0xb9, 0x46, 0xa3, 0x37, 0xbe, 0xe8, 0x94, 0x26,
+ 0x74, 0x27, 0x88, 0x59, 0xe1, 0x32, 0x92, 0xfb } },
+
+ { "data-50 key-26",
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+ "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+ { 0xb0, 0xba, 0x46, 0x56, 0x37, 0x45, 0x8c, 0x69,
+ 0x90, 0xe5, 0xa8, 0xc5, 0xf6, 0x1d, 0x4a, 0xf7,
+ 0xe5, 0x76, 0xd9, 0x7f, 0xf9, 0x4b, 0x87, 0x2d,
+ 0xe7, 0x6f, 0x80, 0x50, 0x36, 0x1e, 0xe3, 0xdb,
+ 0xa9, 0x1c, 0xa5, 0xc1, 0x1a, 0xa2, 0x5e, 0xb4,
+ 0xd6, 0x79, 0x27, 0x5c, 0xc5, 0x78, 0x80, 0x63,
+ 0xa5, 0xf1, 0x97, 0x41, 0x12, 0x0c, 0x4f, 0x2d,
+ 0xe2, 0xad, 0xeb, 0xeb, 0x10, 0xa2, 0x98, 0xdd } },
+
+ { "data-54 key-131",
+ "Test Using Larger Than Block-Size Key - Hash Key First",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0x80, 0xb2, 0x42, 0x63, 0xc7, 0xc1, 0xa3, 0xeb,
+ 0xb7, 0x14, 0x93, 0xc1, 0xdd, 0x7b, 0xe8, 0xb4,
+ 0x9b, 0x46, 0xd1, 0xf4, 0x1b, 0x4a, 0xee, 0xc1,
+ 0x12, 0x1b, 0x01, 0x37, 0x83, 0xf8, 0xf3, 0x52,
+ 0x6b, 0x56, 0xd0, 0x37, 0xe0, 0x5f, 0x25, 0x98,
+ 0xbd, 0x0f, 0xd2, 0x21, 0x5d, 0x6a, 0x1e, 0x52,
+ 0x95, 0xe6, 0x4f, 0x73, 0xf6, 0x3f, 0x0a, 0xec,
+ 0x8b, 0x91, 0x5a, 0x98, 0x5d, 0x78, 0x65, 0x98 } },
+
+ { "data-152 key-131",
+ "This is a test using a larger than block-size key and a larger "
+ "than block-size data. The key needs to be hashed before being "
+ "used by the HMAC algorithm.",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+ { 0xe3, 0x7b, 0x6a, 0x77, 0x5d, 0xc8, 0x7d, 0xba,
+ 0xa4, 0xdf, 0xa9, 0xf9, 0x6e, 0x5e, 0x3f, 0xfd,
+ 0xde, 0xbd, 0x71, 0xf8, 0x86, 0x72, 0x89, 0x86,
+ 0x5d, 0xf5, 0xa3, 0x2d, 0x20, 0xcd, 0xc9, 0x44,
+ 0xb6, 0x02, 0x2c, 0xac, 0x3c, 0x49, 0x82, 0xb1,
+ 0x0d, 0x5e, 0xeb, 0x55, 0xc3, 0xe4, 0xde, 0x15,
+ 0x13, 0x46, 0x76, 0xfb, 0x6d, 0xe0, 0x44, 0x60,
+ 0x65, 0xc9, 0x74, 0x40, 0xfa, 0x8c, 0x6a, 0x58 } },
+
+ { NULL }
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+
+ for (tvidx=0; tv[tvidx].desc; tvidx++)
+ {
+ what = tv[tvidx].desc;
+ errtxt = check_one (GCRY_MD_SHA512,
+ tv[tvidx].data, strlen (tv[tvidx].data),
+ tv[tvidx].key, strlen (tv[tvidx].key),
+ tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+ if (errtxt)
+ goto failed;
+ if (!extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("hmac", GCRY_MD_SHA512, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+/* Test for the SHA3 algorithms. Vectors taken on 2017-07-18 from
+ * http://www.wolfgang-ehrhardt.de/hmac-sha3-testvectors.html */
+static gpg_err_code_t
+selftests_sha3 (int hashalgo, int extended, selftest_report_func_t report)
+{
+ static struct
+ {
+ const char * const desc;
+ const char * const data;
+ const char * const key;
+ const char expect_224[28];
+ const char expect_256[32];
+ const char expect_384[48];
+ const char expect_512[64];
+ unsigned char trunc;
+ } tv[] =
+ {
+ { "data-9 key-20", /* Test 1 */
+ "Hi There",
+ "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+ "\x0b\x0b\x0b\x0b",
+
+ { 0x3b, 0x16, 0x54, 0x6b, 0xbc, 0x7b, 0xe2, 0x70,
+ 0x6a, 0x03, 0x1d, 0xca, 0xfd, 0x56, 0x37, 0x3d,
+ 0x98, 0x84, 0x36, 0x76, 0x41, 0xd8, 0xc5, 0x9a,
+ 0xf3, 0xc8, 0x60, 0xf7 },
+ { 0xba, 0x85, 0x19, 0x23, 0x10, 0xdf, 0xfa, 0x96,
+ 0xe2, 0xa3, 0xa4, 0x0e, 0x69, 0x77, 0x43, 0x51,
+ 0x14, 0x0b, 0xb7, 0x18, 0x5e, 0x12, 0x02, 0xcd,
+ 0xcc, 0x91, 0x75, 0x89, 0xf9, 0x5e, 0x16, 0xbb },
+ { 0x68, 0xd2, 0xdc, 0xf7, 0xfd, 0x4d, 0xdd, 0x0a,
+ 0x22, 0x40, 0xc8, 0xa4, 0x37, 0x30, 0x5f, 0x61,
+ 0xfb, 0x73, 0x34, 0xcf, 0xb5, 0xd0, 0x22, 0x6e,
+ 0x1b, 0xc2, 0x7d, 0xc1, 0x0a, 0x2e, 0x72, 0x3a,
+ 0x20, 0xd3, 0x70, 0xb4, 0x77, 0x43, 0x13, 0x0e,
+ 0x26, 0xac, 0x7e, 0x3d, 0x53, 0x28, 0x86, 0xbd },
+ { 0xeb, 0x3f, 0xbd, 0x4b, 0x2e, 0xaa, 0xb8, 0xf5,
+ 0xc5, 0x04, 0xbd, 0x3a, 0x41, 0x46, 0x5a, 0xac,
+ 0xec, 0x15, 0x77, 0x0a, 0x7c, 0xab, 0xac, 0x53,
+ 0x1e, 0x48, 0x2f, 0x86, 0x0b, 0x5e, 0xc7, 0xba,
+ 0x47, 0xcc, 0xb2, 0xc6, 0xf2, 0xaf, 0xce, 0x8f,
+ 0x88, 0xd2, 0x2b, 0x6d, 0xc6, 0x13, 0x80, 0xf2,
+ 0x3a, 0x66, 0x8f, 0xd3, 0x88, 0x8b, 0xb8, 0x05,
+ 0x37, 0xc0, 0xa0, 0xb8, 0x64, 0x07, 0x68, 0x9e }
+ },
+
+ { "data-28 key-4", /* Test 2 */
+ /* Test with a key shorter than the length of the HMAC output. */
+ "what do ya want for nothing?",
+ "Jefe",
+
+ { 0x7f, 0xdb, 0x8d, 0xd8, 0x8b, 0xd2, 0xf6, 0x0d,
+ 0x1b, 0x79, 0x86, 0x34, 0xad, 0x38, 0x68, 0x11,
+ 0xc2, 0xcf, 0xc8, 0x5b, 0xfa, 0xf5, 0xd5, 0x2b,
+ 0xba, 0xce, 0x5e, 0x66 },
+ { 0xc7, 0xd4, 0x07, 0x2e, 0x78, 0x88, 0x77, 0xae,
+ 0x35, 0x96, 0xbb, 0xb0, 0xda, 0x73, 0xb8, 0x87,
+ 0xc9, 0x17, 0x1f, 0x93, 0x09, 0x5b, 0x29, 0x4a,
+ 0xe8, 0x57, 0xfb, 0xe2, 0x64, 0x5e, 0x1b, 0xa5 },
+ { 0xf1, 0x10, 0x1f, 0x8c, 0xbf, 0x97, 0x66, 0xfd,
+ 0x67, 0x64, 0xd2, 0xed, 0x61, 0x90, 0x3f, 0x21,
+ 0xca, 0x9b, 0x18, 0xf5, 0x7c, 0xf3, 0xe1, 0xa2,
+ 0x3c, 0xa1, 0x35, 0x08, 0xa9, 0x32, 0x43, 0xce,
+ 0x48, 0xc0, 0x45, 0xdc, 0x00, 0x7f, 0x26, 0xa2,
+ 0x1b, 0x3f, 0x5e, 0x0e, 0x9d, 0xf4, 0xc2, 0x0a },
+ { 0x5a, 0x4b, 0xfe, 0xab, 0x61, 0x66, 0x42, 0x7c,
+ 0x7a, 0x36, 0x47, 0xb7, 0x47, 0x29, 0x2b, 0x83,
+ 0x84, 0x53, 0x7c, 0xdb, 0x89, 0xaf, 0xb3, 0xbf,
+ 0x56, 0x65, 0xe4, 0xc5, 0xe7, 0x09, 0x35, 0x0b,
+ 0x28, 0x7b, 0xae, 0xc9, 0x21, 0xfd, 0x7c, 0xa0,
+ 0xee, 0x7a, 0x0c, 0x31, 0xd0, 0x22, 0xa9, 0x5e,
+ 0x1f, 0xc9, 0x2b, 0xa9, 0xd7, 0x7d, 0xf8, 0x83,
+ 0x96, 0x02, 0x75, 0xbe, 0xb4, 0xe6, 0x20, 0x24 }
+ },
+
+ { "data-50 key-20", /* Test 3 */
+ /* Test with a combined length of key and data that is larger
+ * than 64 bytes (= block-size of SHA-224 and SHA-256). */
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa",
+
+ { 0x67, 0x6c, 0xfc, 0x7d, 0x16, 0x15, 0x36, 0x38,
+ 0x78, 0x03, 0x90, 0x69, 0x2b, 0xe1, 0x42, 0xd2,
+ 0xdf, 0x7c, 0xe9, 0x24, 0xb9, 0x09, 0xc0, 0xc0,
+ 0x8d, 0xbf, 0xdc, 0x1a },
+ { 0x84, 0xec, 0x79, 0x12, 0x4a, 0x27, 0x10, 0x78,
+ 0x65, 0xce, 0xdd, 0x8b, 0xd8, 0x2d, 0xa9, 0x96,
+ 0x5e, 0x5e, 0xd8, 0xc3, 0x7b, 0x0a, 0xc9, 0x80,
+ 0x05, 0xa7, 0xf3, 0x9e, 0xd5, 0x8a, 0x42, 0x07 },
+ { 0x27, 0x5c, 0xd0, 0xe6, 0x61, 0xbb, 0x8b, 0x15,
+ 0x1c, 0x64, 0xd2, 0x88, 0xf1, 0xf7, 0x82, 0xfb,
+ 0x91, 0xa8, 0xab, 0xd5, 0x68, 0x58, 0xd7, 0x2b,
+ 0xab, 0xb2, 0xd4, 0x76, 0xf0, 0x45, 0x83, 0x73,
+ 0xb4, 0x1b, 0x6a, 0xb5, 0xbf, 0x17, 0x4b, 0xec,
+ 0x42, 0x2e, 0x53, 0xfc, 0x31, 0x35, 0xac, 0x6e },
+ { 0x30, 0x9e, 0x99, 0xf9, 0xec, 0x07, 0x5e, 0xc6,
+ 0xc6, 0xd4, 0x75, 0xed, 0xa1, 0x18, 0x06, 0x87,
+ 0xfc, 0xf1, 0x53, 0x11, 0x95, 0x80, 0x2a, 0x99,
+ 0xb5, 0x67, 0x74, 0x49, 0xa8, 0x62, 0x51, 0x82,
+ 0x85, 0x1c, 0xb3, 0x32, 0xaf, 0xb6, 0xa8, 0x9c,
+ 0x41, 0x13, 0x25, 0xfb, 0xcb, 0xcd, 0x42, 0xaf,
+ 0xcb, 0x7b, 0x6e, 0x5a, 0xab, 0x7e, 0xa4, 0x2c,
+ 0x66, 0x0f, 0x97, 0xfd, 0x85, 0x84, 0xbf, 0x03 }
+ },
+
+ { "data-50 key-25", /* Test 4 */
+ /* Test with a combined length of key and data that is larger
+ * than 64 bytes (= block-size of SHA-224 and SHA-256). */
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+ "\xcd\xcd",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+ "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+
+ { 0xa9, 0xd7, 0x68, 0x5a, 0x19, 0xc4, 0xe0, 0xdb,
+ 0xd9, 0xdf, 0x25, 0x56, 0xcc, 0x8a, 0x7d, 0x2a,
+ 0x77, 0x33, 0xb6, 0x76, 0x25, 0xce, 0x59, 0x4c,
+ 0x78, 0x27, 0x0e, 0xeb },
+ { 0x57, 0x36, 0x6a, 0x45, 0xe2, 0x30, 0x53, 0x21,
+ 0xa4, 0xbc, 0x5a, 0xa5, 0xfe, 0x2e, 0xf8, 0xa9,
+ 0x21, 0xf6, 0xaf, 0x82, 0x73, 0xd7, 0xfe, 0x7b,
+ 0xe6, 0xcf, 0xed, 0xb3, 0xf0, 0xae, 0xa6, 0xd7 },
+ { 0x3a, 0x5d, 0x7a, 0x87, 0x97, 0x02, 0xc0, 0x86,
+ 0xbc, 0x96, 0xd1, 0xdd, 0x8a, 0xa1, 0x5d, 0x9c,
+ 0x46, 0x44, 0x6b, 0x95, 0x52, 0x13, 0x11, 0xc6,
+ 0x06, 0xfd, 0xc4, 0xe3, 0x08, 0xf4, 0xb9, 0x84,
+ 0xda, 0x2d, 0x0f, 0x94, 0x49, 0xb3, 0xba, 0x84,
+ 0x25, 0xec, 0x7f, 0xb8, 0xc3, 0x1b, 0xc1, 0x36 },
+ { 0xb2, 0x7e, 0xab, 0x1d, 0x6e, 0x8d, 0x87, 0x46,
+ 0x1c, 0x29, 0xf7, 0xf5, 0x73, 0x9d, 0xd5, 0x8e,
+ 0x98, 0xaa, 0x35, 0xf8, 0xe8, 0x23, 0xad, 0x38,
+ 0xc5, 0x49, 0x2a, 0x20, 0x88, 0xfa, 0x02, 0x81,
+ 0x99, 0x3b, 0xbf, 0xff, 0x9a, 0x0e, 0x9c, 0x6b,
+ 0xf1, 0x21, 0xae, 0x9e, 0xc9, 0xbb, 0x09, 0xd8,
+ 0x4a, 0x5e, 0xba, 0xc8, 0x17, 0x18, 0x2e, 0xa9,
+ 0x74, 0x67, 0x3f, 0xb1, 0x33, 0xca, 0x0d, 0x1d }
+ },
+
+ { "data-20 key-20 trunc", /* Test 5 */
+ /* Test with a truncation of output to 128 bits. */
+ "Test With Truncation",
+ "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
+ "\x0c\x0c\x0c\x0c",
+
+ { 0x49, 0xfd, 0xd3, 0xab, 0xd0, 0x05, 0xeb, 0xb8,
+ 0xae, 0x63, 0xfe, 0xa9, 0x46, 0xd1, 0x88, 0x3c },
+ { 0x6e, 0x02, 0xc6, 0x45, 0x37, 0xfb, 0x11, 0x80,
+ 0x57, 0xab, 0xb7, 0xfb, 0x66, 0xa2, 0x3b, 0x3c },
+ { 0x47, 0xc5, 0x1a, 0xce, 0x1f, 0xfa, 0xcf, 0xfd,
+ 0x74, 0x94, 0x72, 0x46, 0x82, 0x61, 0x57, 0x83 },
+ { 0x0f, 0xa7, 0x47, 0x59, 0x48, 0xf4, 0x3f, 0x48,
+ 0xca, 0x05, 0x16, 0x67, 0x1e, 0x18, 0x97, 0x8c },
+ 16
+ },
+
+ { "data-54 key-131", /* Test 6 */
+ /* Test with a key larger than 128 bytes (= block-size of
+ * SHA-384 and SHA-512). */
+ "Test Using Larger Than Block-Size Key - Hash Key First",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+
+ { 0xb4, 0xa1, 0xf0, 0x4c, 0x00, 0x28, 0x7a, 0x9b,
+ 0x7f, 0x60, 0x75, 0xb3, 0x13, 0xd2, 0x79, 0xb8,
+ 0x33, 0xbc, 0x8f, 0x75, 0x12, 0x43, 0x52, 0xd0,
+ 0x5f, 0xb9, 0x99, 0x5f },
+ { 0xed, 0x73, 0xa3, 0x74, 0xb9, 0x6c, 0x00, 0x52,
+ 0x35, 0xf9, 0x48, 0x03, 0x2f, 0x09, 0x67, 0x4a,
+ 0x58, 0xc0, 0xce, 0x55, 0x5c, 0xfc, 0x1f, 0x22,
+ 0x3b, 0x02, 0x35, 0x65, 0x60, 0x31, 0x2c, 0x3b },
+ { 0x0f, 0xc1, 0x95, 0x13, 0xbf, 0x6b, 0xd8, 0x78,
+ 0x03, 0x70, 0x16, 0x70, 0x6a, 0x0e, 0x57, 0xbc,
+ 0x52, 0x81, 0x39, 0x83, 0x6b, 0x9a, 0x42, 0xc3,
+ 0xd4, 0x19, 0xe4, 0x98, 0xe0, 0xe1, 0xfb, 0x96,
+ 0x16, 0xfd, 0x66, 0x91, 0x38, 0xd3, 0x3a, 0x11,
+ 0x05, 0xe0, 0x7c, 0x72, 0xb6, 0x95, 0x3b, 0xcc },
+ { 0x00, 0xf7, 0x51, 0xa9, 0xe5, 0x06, 0x95, 0xb0,
+ 0x90, 0xed, 0x69, 0x11, 0xa4, 0xb6, 0x55, 0x24,
+ 0x95, 0x1c, 0xdc, 0x15, 0xa7, 0x3a, 0x5d, 0x58,
+ 0xbb, 0x55, 0x21, 0x5e, 0xa2, 0xcd, 0x83, 0x9a,
+ 0xc7, 0x9d, 0x2b, 0x44, 0xa3, 0x9b, 0xaf, 0xab,
+ 0x27, 0xe8, 0x3f, 0xde, 0x9e, 0x11, 0xf6, 0x34,
+ 0x0b, 0x11, 0xd9, 0x91, 0xb1, 0xb9, 0x1b, 0xf2,
+ 0xee, 0xe7, 0xfc, 0x87, 0x24, 0x26, 0xc3, 0xa4 }
+ },
+
+ { "data-54 key-147", /* Test 6a */
+ /* Test with a key larger than 144 bytes (= block-size of
+ * SHA3-224). */
+ "Test Using Larger Than Block-Size Key - Hash Key First",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+
+ { 0xb9, 0x6d, 0x73, 0x0c, 0x14, 0x8c, 0x2d, 0xaa,
+ 0xd8, 0x64, 0x9d, 0x83, 0xde, 0xfa, 0xa3, 0x71,
+ 0x97, 0x38, 0xd3, 0x47, 0x75, 0x39, 0x7b, 0x75,
+ 0x71, 0xc3, 0x85, 0x15 },
+ { 0xa6, 0x07, 0x2f, 0x86, 0xde, 0x52, 0xb3, 0x8b,
+ 0xb3, 0x49, 0xfe, 0x84, 0xcd, 0x6d, 0x97, 0xfb,
+ 0x6a, 0x37, 0xc4, 0xc0, 0xf6, 0x2a, 0xae, 0x93,
+ 0x98, 0x11, 0x93, 0xa7, 0x22, 0x9d, 0x34, 0x67 },
+ { 0x71, 0x3d, 0xff, 0x03, 0x02, 0xc8, 0x50, 0x86,
+ 0xec, 0x5a, 0xd0, 0x76, 0x8d, 0xd6, 0x5a, 0x13,
+ 0xdd, 0xd7, 0x90, 0x68, 0xd8, 0xd4, 0xc6, 0x21,
+ 0x2b, 0x71, 0x2e, 0x41, 0x64, 0x94, 0x49, 0x11,
+ 0x14, 0x80, 0x23, 0x00, 0x44, 0x18, 0x5a, 0x99,
+ 0x10, 0x3e, 0xd8, 0x20, 0x04, 0xdd, 0xbf, 0xcc },
+ { 0xb1, 0x48, 0x35, 0xc8, 0x19, 0xa2, 0x90, 0xef,
+ 0xb0, 0x10, 0xac, 0xe6, 0xd8, 0x56, 0x8d, 0xc6,
+ 0xb8, 0x4d, 0xe6, 0x0b, 0xc4, 0x9b, 0x00, 0x4c,
+ 0x3b, 0x13, 0xed, 0xa7, 0x63, 0x58, 0x94, 0x51,
+ 0xe5, 0xdd, 0x74, 0x29, 0x28, 0x84, 0xd1, 0xbd,
+ 0xce, 0x64, 0xe6, 0xb9, 0x19, 0xdd, 0x61, 0xdc,
+ 0x9c, 0x56, 0xa2, 0x82, 0xa8, 0x1c, 0x0b, 0xd1,
+ 0x4f, 0x1f, 0x36, 0x5b, 0x49, 0xb8, 0x3a, 0x5b }
+ },
+
+ { "data-152 key-131", /* Test 7 */
+ /* Test with a key and data that is larger than 128 bytes (=
+ * block-size of SHA-384 and SHA-512). */
+ "This is a test using a larger than block-size key and a larger "
+ "than block-size data. The key needs to be hashed before being "
+ "used by the HMAC algorithm.",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+
+ { 0x05, 0xd8, 0xcd, 0x6d, 0x00, 0xfa, 0xea, 0x8d,
+ 0x1e, 0xb6, 0x8a, 0xde, 0x28, 0x73, 0x0b, 0xbd,
+ 0x3c, 0xba, 0xb6, 0x92, 0x9f, 0x0a, 0x08, 0x6b,
+ 0x29, 0xcd, 0x62, 0xa0 },
+ { 0x65, 0xc5, 0xb0, 0x6d, 0x4c, 0x3d, 0xe3, 0x2a,
+ 0x7a, 0xef, 0x87, 0x63, 0x26, 0x1e, 0x49, 0xad,
+ 0xb6, 0xe2, 0x29, 0x3e, 0xc8, 0xe7, 0xc6, 0x1e,
+ 0x8d, 0xe6, 0x17, 0x01, 0xfc, 0x63, 0xe1, 0x23 },
+ { 0x02, 0x6f, 0xdf, 0x6b, 0x50, 0x74, 0x1e, 0x37,
+ 0x38, 0x99, 0xc9, 0xf7, 0xd5, 0x40, 0x6d, 0x4e,
+ 0xb0, 0x9f, 0xc6, 0x66, 0x56, 0x36, 0xfc, 0x1a,
+ 0x53, 0x00, 0x29, 0xdd, 0xf5, 0xcf, 0x3c, 0xa5,
+ 0xa9, 0x00, 0xed, 0xce, 0x01, 0xf5, 0xf6, 0x1e,
+ 0x2f, 0x40, 0x8c, 0xdf, 0x2f, 0xd3, 0xe7, 0xe8 },
+ { 0x38, 0xa4, 0x56, 0xa0, 0x04, 0xbd, 0x10, 0xd3,
+ 0x2c, 0x9a, 0xb8, 0x33, 0x66, 0x84, 0x11, 0x28,
+ 0x62, 0xc3, 0xdb, 0x61, 0xad, 0xcc, 0xa3, 0x18,
+ 0x29, 0x35, 0x5e, 0xaf, 0x46, 0xfd, 0x5c, 0x73,
+ 0xd0, 0x6a, 0x1f, 0x0d, 0x13, 0xfe, 0xc9, 0xa6,
+ 0x52, 0xfb, 0x38, 0x11, 0xb5, 0x77, 0xb1, 0xb1,
+ 0xd1, 0xb9, 0x78, 0x9f, 0x97, 0xae, 0x5b, 0x83,
+ 0xc6, 0xf4, 0x4d, 0xfc, 0xf1, 0xd6, 0x7e, 0xba }
+ },
+
+ { "data-152 key-147", /* Test 7a */
+ /* Test with a key larger than 144 bytes (= block-size of
+ * SHA3-224). */
+ "This is a test using a larger than block-size key and a larger "
+ "than block-size data. The key needs to be hashed before being "
+ "used by the HMAC algorithm.",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa",
+
+ { 0xc7, 0x9c, 0x9b, 0x09, 0x34, 0x24, 0xe5, 0x88,
+ 0xa9, 0x87, 0x8b, 0xbc, 0xb0, 0x89, 0xe0, 0x18,
+ 0x27, 0x00, 0x96, 0xe9, 0xb4, 0xb1, 0xa9, 0xe8,
+ 0x22, 0x0c, 0x86, 0x6a },
+ { 0xe6, 0xa3, 0x6d, 0x9b, 0x91, 0x5f, 0x86, 0xa0,
+ 0x93, 0xca, 0xc7, 0xd1, 0x10, 0xe9, 0xe0, 0x4c,
+ 0xf1, 0xd6, 0x10, 0x0d, 0x30, 0x47, 0x55, 0x09,
+ 0xc2, 0x47, 0x5f, 0x57, 0x1b, 0x75, 0x8b, 0x5a },
+ { 0xca, 0xd1, 0x8a, 0x8f, 0xf6, 0xc4, 0xcc, 0x3a,
+ 0xd4, 0x87, 0xb9, 0x5f, 0x97, 0x69, 0xe9, 0xb6,
+ 0x1c, 0x06, 0x2a, 0xef, 0xd6, 0x95, 0x25, 0x69,
+ 0xe6, 0xe6, 0x42, 0x18, 0x97, 0x05, 0x4c, 0xfc,
+ 0x70, 0xb5, 0xfd, 0xc6, 0x60, 0x5c, 0x18, 0x45,
+ 0x71, 0x12, 0xfc, 0x6a, 0xaa, 0xd4, 0x55, 0x85 },
+ { 0xdc, 0x03, 0x0e, 0xe7, 0x88, 0x70, 0x34, 0xf3,
+ 0x2c, 0xf4, 0x02, 0xdf, 0x34, 0x62, 0x2f, 0x31,
+ 0x1f, 0x3e, 0x6c, 0xf0, 0x48, 0x60, 0xc6, 0xbb,
+ 0xd7, 0xfa, 0x48, 0x86, 0x74, 0x78, 0x2b, 0x46,
+ 0x59, 0xfd, 0xbd, 0xf3, 0xfd, 0x87, 0x78, 0x52,
+ 0x88, 0x5c, 0xfe, 0x6e, 0x22, 0x18, 0x5f, 0xe7,
+ 0xb2, 0xee, 0x95, 0x20, 0x43, 0x62, 0x9b, 0xc9,
+ 0xd5, 0xf3, 0x29, 0x8a, 0x41, 0xd0, 0x2c, 0x66 }
+ }/*,*/
+
+ /* Our API does not allow to specify a bit count and thus we
+ * can't use the following test. */
+ /* { "data-5bit key-4", /\* Test 8 *\/ */
+ /* /\* Test with data bit size no multiple of 8, the data bits are */
+ /* * '11001' from the NIST example using SHA-3 order (= 5 bits */
+ /* * from LSB hex byte 13 or 5 bits from MSB hex byte c8). *\/ */
+ /* "\xc8", */
+ /* "Jefe", */
+
+ /* { 0x5f, 0x8c, 0x0e, 0xa7, 0xfa, 0xfe, 0xcd, 0x0c, */
+ /* 0x34, 0x63, 0xaa, 0xd0, 0x97, 0x42, 0xce, 0xce, */
+ /* 0xb1, 0x42, 0xfe, 0x0a, 0xb6, 0xf4, 0x53, 0x94, */
+ /* 0x38, 0xc5, 0x9d, 0xe8 }, */
+ /* { 0xec, 0x82, 0x22, 0x77, 0x3f, 0xac, 0x68, 0xb3, */
+ /* 0xd3, 0xdc, 0xb1, 0x82, 0xae, 0xc8, 0xb0, 0x50, */
+ /* 0x7a, 0xce, 0x44, 0x48, 0xd2, 0x0a, 0x11, 0x47, */
+ /* 0xe6, 0x82, 0x11, 0x8d, 0xa4, 0xe3, 0xf4, 0x4c }, */
+ /* { 0x21, 0xfb, 0xd3, 0xbf, 0x3e, 0xbb, 0xa3, 0xcf, */
+ /* 0xc9, 0xef, 0x64, 0xc0, 0x59, 0x1c, 0x92, 0xc5, */
+ /* 0xac, 0xb2, 0x65, 0xe9, 0x2d, 0x87, 0x61, 0xd1, */
+ /* 0xf9, 0x1a, 0x52, 0xa1, 0x03, 0xa6, 0xc7, 0x96, */
+ /* 0x94, 0xcf, 0xd6, 0x7a, 0x9a, 0x2a, 0xc1, 0x32, */
+ /* 0x4f, 0x02, 0xfe, 0xa6, 0x3b, 0x81, 0xef, 0xfc }, */
+ /* { 0x27, 0xf9, 0x38, 0x8c, 0x15, 0x67, 0xef, 0x4e, */
+ /* 0xf2, 0x00, 0x60, 0x2a, 0x6c, 0xf8, 0x71, 0xd6, */
+ /* 0x8a, 0x6f, 0xb0, 0x48, 0xd4, 0x73, 0x7a, 0xc4, */
+ /* 0x41, 0x8a, 0x2f, 0x02, 0x12, 0x89, 0xd1, 0x3d, */
+ /* 0x1f, 0xd1, 0x12, 0x0f, 0xec, 0xb9, 0xcf, 0x96, */
+ /* 0x4c, 0x5b, 0x11, 0x7a, 0xb5, 0xb1, 0x1c, 0x61, */
+ /* 0x4b, 0x2d, 0xa3, 0x9d, 0xad, 0xd5, 0x1f, 0x2f, */
+ /* 0x5e, 0x22, 0xaa, 0xcc, 0xec, 0x7d, 0x57, 0x6e } */
+ /* } */
+
+ };
+ const char *what;
+ const char *errtxt;
+ int tvidx;
+ const char *expect;
+ int nexpect;
+
+ for (tvidx=0; tvidx < DIM(tv); tvidx++)
+ {
+ what = tv[tvidx].desc;
+ if (hashalgo == GCRY_MD_SHA3_224)
+ {
+ expect = tv[tvidx].expect_224;
+ nexpect = DIM (tv[tvidx].expect_224);
+ }
+ else if (hashalgo == GCRY_MD_SHA3_256)
+ {
+ expect = tv[tvidx].expect_256;
+ nexpect = DIM (tv[tvidx].expect_256);
+ }
+ else if (hashalgo == GCRY_MD_SHA3_384)
+ {
+ expect = tv[tvidx].expect_384;
+ nexpect = DIM (tv[tvidx].expect_384);
+ }
+ else if (hashalgo == GCRY_MD_SHA3_512)
+ {
+ expect = tv[tvidx].expect_512;
+ nexpect = DIM (tv[tvidx].expect_512);
+ }
+ else
+ BUG();
+
+ if (tv[tvidx].trunc && tv[tvidx].trunc < nexpect)
+ nexpect = tv[tvidx].trunc;
+
+ errtxt = check_one (hashalgo,
+ tv[tvidx].data, strlen (tv[tvidx].data),
+ tv[tvidx].key, strlen (tv[tvidx].key),
+ expect, nexpect, !!tv[tvidx].trunc);
+ if (errtxt)
+ goto failed;
+ if (!extended)
+ break;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("hmac", hashalgo, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+hmac_selftest (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_MAC_HMAC_SHA1:
+ ec = selftests_sha1 (extended, report);
+ break;
+ case GCRY_MAC_HMAC_SHA224:
+ ec = selftests_sha224 (extended, report);
+ break;
+ case GCRY_MAC_HMAC_SHA256:
+ ec = selftests_sha256 (extended, report);
+ break;
+ case GCRY_MAC_HMAC_SHA384:
+ ec = selftests_sha384 (extended, report);
+ break;
+ case GCRY_MAC_HMAC_SHA512:
+ ec = selftests_sha512 (extended, report);
+ break;
+
+ case GCRY_MAC_HMAC_SHA3_224:
+ case GCRY_MAC_HMAC_SHA3_256:
+ case GCRY_MAC_HMAC_SHA3_384:
+ case GCRY_MAC_HMAC_SHA3_512:
+ {
+ int md_algo = map_mac_algo_to_md (algo);
+ ec = selftests_sha3 (md_algo, extended, report);
+ }
+ break;
+
+ default:
+ ec = GPG_ERR_MAC_ALGO;
+ break;
+ }
+
+ return ec;
+}
+
+
+static const gcry_mac_spec_ops_t hmac_ops = {
+ hmac_open,
+ hmac_close,
+ hmac_setkey,
+ NULL,
+ hmac_reset,
+ hmac_write,
+ hmac_read,
+ hmac_verify,
+ hmac_get_maclen,
+ hmac_get_keylen,
+ NULL,
+ hmac_selftest
+};
+
+
+#if USE_SHA1
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1 = {
+ GCRY_MAC_HMAC_SHA1, {0, 1}, "HMAC_SHA1",
+ &hmac_ops
+};
+#endif
+#if USE_SHA256
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256 = {
+ GCRY_MAC_HMAC_SHA256, {0, 1}, "HMAC_SHA256",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224 = {
+ GCRY_MAC_HMAC_SHA224, {0, 1}, "HMAC_SHA224",
+ &hmac_ops
+};
+#endif
+#if USE_SHA512
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512 = {
+ GCRY_MAC_HMAC_SHA512, {0, 1}, "HMAC_SHA512",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384 = {
+ GCRY_MAC_HMAC_SHA384, {0, 1}, "HMAC_SHA384",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_256 = {
+ GCRY_MAC_HMAC_SHA512_256, {0, 1}, "HMAC_SHA512_256",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_224 = {
+ GCRY_MAC_HMAC_SHA512_224, {0, 1}, "HMAC_SHA512_224",
+ &hmac_ops
+};
+
+#endif
+#if USE_SHA3
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224 = {
+ GCRY_MAC_HMAC_SHA3_224, {0, 1}, "HMAC_SHA3_224",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256 = {
+ GCRY_MAC_HMAC_SHA3_256, {0, 1}, "HMAC_SHA3_256",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384 = {
+ GCRY_MAC_HMAC_SHA3_384, {0, 1}, "HMAC_SHA3_384",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512 = {
+ GCRY_MAC_HMAC_SHA3_512, {0, 1}, "HMAC_SHA3_512",
+ &hmac_ops
+};
+#endif
+#ifdef USE_GOST_R_3411_94
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94 = {
+ GCRY_MAC_HMAC_GOSTR3411_94, {0, 0}, "HMAC_GOSTR3411_94",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_cp = {
+ GCRY_MAC_HMAC_GOSTR3411_CP, {0, 0}, "HMAC_GOSTR3411_CP",
+ &hmac_ops
+};
+#endif
+#ifdef USE_GOST_R_3411_12
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256 = {
+ GCRY_MAC_HMAC_STRIBOG256, {0, 0}, "HMAC_STRIBOG256",
+ &hmac_ops
+};
+
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512 = {
+ GCRY_MAC_HMAC_STRIBOG512, {0, 0}, "HMAC_STRIBOG512",
+ &hmac_ops
+};
+#endif
+#if USE_WHIRLPOOL
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool = {
+ GCRY_MAC_HMAC_WHIRLPOOL, {0, 0}, "HMAC_WHIRLPOOL",
+ &hmac_ops
+};
+#endif
+#if USE_RMD160
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160 = {
+ GCRY_MAC_HMAC_RMD160, {0, 0}, "HMAC_RIPEMD160",
+ &hmac_ops
+};
+#endif
+#if USE_TIGER
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1 = {
+ GCRY_MAC_HMAC_TIGER1, {0, 0}, "HMAC_TIGER",
+ &hmac_ops
+};
+#endif
+#if USE_MD5
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5 = {
+ GCRY_MAC_HMAC_MD5, {0, 0}, "HMAC_MD5",
+ &hmac_ops
+};
+#endif
+#if USE_MD4
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4 = {
+ GCRY_MAC_HMAC_MD4, {0, 0}, "HMAC_MD4",
+ &hmac_ops
+};
+#endif
+#if USE_MD2
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_md2 = {
+ GCRY_MAC_HMAC_MD2, {0, 0}, "HMAC_MD2",
+ &hmac_ops
+};
+#endif
+#if USE_BLAKE2
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_512 = {
+ GCRY_MAC_HMAC_BLAKE2B_512, {0, 0}, "HMAC_BLAKE2B_512",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_384 = {
+ GCRY_MAC_HMAC_BLAKE2B_384, {0, 0}, "HMAC_BLAKE2B_384",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_256 = {
+ GCRY_MAC_HMAC_BLAKE2B_256, {0, 0}, "HMAC_BLAKE2B_256",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_160 = {
+ GCRY_MAC_HMAC_BLAKE2B_160, {0, 0}, "HMAC_BLAKE2B_160",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_256 = {
+ GCRY_MAC_HMAC_BLAKE2S_256, {0, 0}, "HMAC_BLAKE2S_256",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_224 = {
+ GCRY_MAC_HMAC_BLAKE2S_224, {0, 0}, "HMAC_BLAKE2S_224",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_160 = {
+ GCRY_MAC_HMAC_BLAKE2S_160, {0, 0}, "HMAC_BLAKE2S_160",
+ &hmac_ops
+};
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_128 = {
+ GCRY_MAC_HMAC_BLAKE2S_128, {0, 0}, "HMAC_BLAKE2S_128",
+ &hmac_ops
+};
+#endif
+#if USE_SM3
+gcry_mac_spec_t _gcry_mac_type_spec_hmac_sm3 = {
+ GCRY_MAC_HMAC_SM3, {0, 0}, "HMAC_SM3",
+ &hmac_ops
+};
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/mac-internal.h b/comm/third_party/libgcrypt/cipher/mac-internal.h
new file mode 100644
index 0000000000..e49885beec
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/mac-internal.h
@@ -0,0 +1,275 @@
+/* mac-internal.h - Internal defs for mac.c
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "g10lib.h"
+#include "cipher-proto.h"
+#include "gost.h"
+
+
+/* The data object used to hold a handle to an encryption object. */
+struct gcry_mac_handle;
+
+/* The data object used to hold poly1305-mac context. */
+struct poly1305mac_context_s;
+
+
+/*
+ *
+ * Message authentication code related definitions.
+ *
+ */
+
+
+/* Magic values for the context structure. */
+#define CTX_MAC_MAGIC_NORMAL 0x59d9b8af
+#define CTX_MAC_MAGIC_SECURE 0x12c27cd0
+
+
+/* MAC module functions. */
+typedef gcry_err_code_t (*gcry_mac_open_func_t)(gcry_mac_hd_t h);
+typedef void (*gcry_mac_close_func_t)(gcry_mac_hd_t h);
+typedef gcry_err_code_t (*gcry_mac_setkey_func_t)(gcry_mac_hd_t h,
+ const unsigned char *key,
+ size_t keylen);
+typedef gcry_err_code_t (*gcry_mac_setiv_func_t)(gcry_mac_hd_t h,
+ const unsigned char *iv,
+ size_t ivlen);
+typedef gcry_err_code_t (*gcry_mac_reset_func_t)(gcry_mac_hd_t h);
+typedef gcry_err_code_t (*gcry_mac_write_func_t)(gcry_mac_hd_t h,
+ const unsigned char *inbuf,
+ size_t inlen);
+typedef gcry_err_code_t (*gcry_mac_read_func_t)(gcry_mac_hd_t h,
+ unsigned char *outbuf,
+ size_t *outlen);
+typedef gcry_err_code_t (*gcry_mac_verify_func_t)(gcry_mac_hd_t h,
+ const unsigned char *inbuf,
+ size_t inlen);
+typedef unsigned int (*gcry_mac_get_maclen_func_t)(int algo);
+typedef unsigned int (*gcry_mac_get_keylen_func_t)(int algo);
+
+/* The type used to convey additional information to a MAC. */
+typedef gpg_err_code_t (*gcry_mac_set_extra_info_t)
+ (gcry_mac_hd_t h, int what, const void *buffer, size_t buflen);
+
+typedef struct gcry_mac_spec_ops
+{
+ gcry_mac_open_func_t open;
+ gcry_mac_close_func_t close;
+ gcry_mac_setkey_func_t setkey;
+ gcry_mac_setiv_func_t setiv;
+ gcry_mac_reset_func_t reset;
+ gcry_mac_write_func_t write;
+ gcry_mac_read_func_t read;
+ gcry_mac_verify_func_t verify;
+ gcry_mac_get_maclen_func_t get_maclen;
+ gcry_mac_get_keylen_func_t get_keylen;
+ gcry_mac_set_extra_info_t set_extra_info;
+ selftest_func_t selftest;
+} gcry_mac_spec_ops_t;
+
+
+/* Module specification structure for message authentication codes. */
+typedef struct gcry_mac_spec
+{
+ int algo;
+ struct {
+ unsigned int disabled:1;
+ unsigned int fips:1;
+ } flags;
+ const char *name;
+ const gcry_mac_spec_ops_t *ops;
+} gcry_mac_spec_t;
+
+/* The handle structure. */
+struct gcry_mac_handle
+{
+ int magic;
+ int algo;
+ const gcry_mac_spec_t *spec;
+ gcry_ctx_t gcry_ctx;
+ union {
+ struct {
+ gcry_md_hd_t md_ctx;
+ int md_algo;
+ } hmac;
+ struct {
+ gcry_cipher_hd_t ctx;
+ int cipher_algo;
+ unsigned int blklen;
+ } cmac;
+ struct {
+ gcry_cipher_hd_t ctx;
+ int cipher_algo;
+ } gmac;
+ struct {
+ struct poly1305mac_context_s *ctx;
+ } poly1305mac;
+ struct {
+ GOST28147_context ctx;
+ u32 n1, n2;
+ unsigned int unused;
+ unsigned int count;
+ unsigned char lastiv[8]; /* IMIT blocksize */
+ } imit;
+ } u;
+};
+
+
+/*
+ * The HMAC algorithm specifications (mac-hmac.c).
+ */
+#if USE_SHA1
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1;
+#endif
+#if USE_SHA256
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224;
+#endif
+#if USE_SHA512
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_224;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_256;
+#endif
+#if USE_SHA3
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512;
+#endif
+#ifdef USE_GOST_R_3411_94
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_cp;
+#endif
+#ifdef USE_GOST_R_3411_12
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512;
+#endif
+#if USE_WHIRLPOOL
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool;
+#endif
+#if USE_RMD160
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160;
+#endif
+#if USE_TIGER
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1;
+#endif
+#if USE_MD5
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5;
+#endif
+#if USE_MD4
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4;
+#endif
+#if USE_BLAKE2
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_512;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_384;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_256;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_160;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_256;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_224;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_160;
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_128;
+#endif
+#if USE_SM3
+extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sm3;
+#endif
+
+/*
+ * The CMAC algorithm specifications (mac-cmac.c).
+ */
+#if USE_BLOWFISH
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish;
+#endif
+#if USE_DES
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes;
+#endif
+#if USE_CAST5
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5;
+#endif
+#if USE_AES
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes;
+#endif
+#if USE_TWOFISH
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish;
+#endif
+#if USE_SERPENT
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent;
+#endif
+#if USE_RFC2268
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268;
+#endif
+#if USE_SEED
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed;
+#endif
+#if USE_CAMELLIA
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia;
+#endif
+#ifdef USE_IDEA
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea;
+#endif
+#if USE_GOST28147
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147;
+#endif
+#if USE_GOST28147
+extern gcry_mac_spec_t _gcry_mac_type_spec_gost28147_imit;
+#endif
+#if USE_SM4
+extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_sm4;
+#endif
+
+/*
+ * The GMAC algorithm specifications (mac-gmac.c).
+ */
+#if USE_AES
+extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes;
+#endif
+#if USE_TWOFISH
+extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish;
+#endif
+#if USE_SERPENT
+extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent;
+#endif
+#if USE_SEED
+extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed;
+#endif
+#if USE_CAMELLIA
+extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia;
+#endif
+
+/*
+ * The Poly1305 MAC algorithm specifications (mac-poly1305.c).
+ */
+extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac;
+#if USE_AES
+extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes;
+#endif
+#if USE_CAMELLIA
+extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia;
+#endif
+#if USE_TWOFISH
+extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish;
+#endif
+#if USE_SERPENT
+extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent;
+#endif
+#if USE_SEED
+extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed;
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/mac-poly1305.c b/comm/third_party/libgcrypt/cipher/mac-poly1305.c
new file mode 100644
index 0000000000..46ea735f89
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/mac-poly1305.c
@@ -0,0 +1,364 @@
+/* mac-poly1305.c - Poly1305 based MACs
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mac-internal.h"
+#include "poly1305-internal.h"
+
+
+struct poly1305mac_context_s {
+ poly1305_context_t ctx;
+ gcry_cipher_hd_t hd;
+ struct {
+ unsigned int key_set:1;
+ unsigned int nonce_set:1;
+ unsigned int tag:1;
+ } marks;
+ byte tag[POLY1305_TAGLEN];
+ byte key[POLY1305_KEYLEN];
+};
+
+
+static gcry_err_code_t
+poly1305mac_open (gcry_mac_hd_t h)
+{
+ struct poly1305mac_context_s *mac_ctx;
+ int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+ unsigned int flags = (secure ? GCRY_CIPHER_SECURE : 0);
+ gcry_err_code_t err;
+ int cipher_algo;
+
+ if (secure)
+ mac_ctx = xtrycalloc_secure (1, sizeof(*mac_ctx));
+ else
+ mac_ctx = xtrycalloc (1, sizeof(*mac_ctx));
+
+ if (!mac_ctx)
+ return gpg_err_code_from_syserror ();
+
+ h->u.poly1305mac.ctx = mac_ctx;
+
+ switch (h->spec->algo)
+ {
+ default:
+ /* already checked. */
+ case GCRY_MAC_POLY1305:
+ /* plain Poly1305. */
+ cipher_algo = -1;
+ return 0;
+ case GCRY_MAC_POLY1305_AES:
+ cipher_algo = GCRY_CIPHER_AES;
+ break;
+ case GCRY_MAC_POLY1305_CAMELLIA:
+ cipher_algo = GCRY_CIPHER_CAMELLIA128;
+ break;
+ case GCRY_MAC_POLY1305_TWOFISH:
+ cipher_algo = GCRY_CIPHER_TWOFISH;
+ break;
+ case GCRY_MAC_POLY1305_SERPENT:
+ cipher_algo = GCRY_CIPHER_SERPENT128;
+ break;
+ case GCRY_MAC_POLY1305_SEED:
+ cipher_algo = GCRY_CIPHER_SEED;
+ break;
+ }
+
+ err = _gcry_cipher_open_internal (&mac_ctx->hd, cipher_algo,
+ GCRY_CIPHER_MODE_ECB, flags);
+ if (err)
+ goto err_free;
+
+ return 0;
+
+err_free:
+ xfree(h->u.poly1305mac.ctx);
+ return err;
+}
+
+
+static void
+poly1305mac_close (gcry_mac_hd_t h)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+ if (h->spec->algo != GCRY_MAC_POLY1305)
+ _gcry_cipher_close (mac_ctx->hd);
+
+ xfree(mac_ctx);
+}
+
+
+static gcry_err_code_t
+poly1305mac_prepare_key (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+ size_t block_keylen = keylen - 16;
+
+ /* Need at least 16 + 1 byte key. */
+ if (keylen <= 16)
+ return GPG_ERR_INV_KEYLEN;
+
+ /* For Poly1305-AES, first part of key is passed to Poly1305 as is. */
+ memcpy (mac_ctx->key, key + block_keylen, 16);
+
+ /* Remaining part is used as key for the block cipher. */
+ return _gcry_cipher_setkey (mac_ctx->hd, key, block_keylen);
+}
+
+
+static gcry_err_code_t
+poly1305mac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+ gcry_err_code_t err;
+
+ memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+ memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag));
+ memset(&mac_ctx->key, 0, sizeof(mac_ctx->key));
+
+ mac_ctx->marks.key_set = 0;
+ mac_ctx->marks.nonce_set = 0;
+ mac_ctx->marks.tag = 0;
+
+ if (h->spec->algo != GCRY_MAC_POLY1305)
+ {
+ err = poly1305mac_prepare_key (h, key, keylen);
+ if (err)
+ return err;
+
+ /* Poly1305-AES/etc also need nonce. */
+ mac_ctx->marks.key_set = 1;
+ mac_ctx->marks.nonce_set = 0;
+ }
+ else
+ {
+ /* For plain Poly1305, key is the nonce and setup is complete now. */
+
+ if (keylen != POLY1305_KEYLEN)
+ return GPG_ERR_INV_KEYLEN;
+
+ memcpy (mac_ctx->key, key, keylen);
+
+ err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN);
+ if (err)
+ {
+ memset(&mac_ctx->key, 0, sizeof(mac_ctx->key));
+ return err;
+ }
+
+ mac_ctx->marks.key_set = 1;
+ mac_ctx->marks.nonce_set = 1;
+ }
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+ gcry_err_code_t err;
+
+ if (h->spec->algo == GCRY_MAC_POLY1305)
+ return GPG_ERR_INV_ARG;
+
+ if (ivlen != 16)
+ return GPG_ERR_INV_ARG;
+
+ if (!mac_ctx->marks.key_set)
+ return 0;
+
+ memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+ memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag));
+ mac_ctx->marks.nonce_set = 0;
+ mac_ctx->marks.tag = 0;
+
+ /* Prepare second part of the poly1305 key. */
+
+ err = _gcry_cipher_encrypt (mac_ctx->hd, mac_ctx->key + 16, 16, iv, 16);
+ if (err)
+ return err;
+
+ err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN);
+ if (err)
+ return err;
+
+ mac_ctx->marks.nonce_set = 1;
+ return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_reset (gcry_mac_hd_t h)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+ if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set)
+ return GPG_ERR_INV_STATE;
+
+ memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+ memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag));
+
+ mac_ctx->marks.key_set = 1;
+ mac_ctx->marks.nonce_set = 1;
+ mac_ctx->marks.tag = 0;
+
+ return _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN);
+}
+
+
+static gcry_err_code_t
+poly1305mac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+ if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set ||
+ mac_ctx->marks.tag)
+ return GPG_ERR_INV_STATE;
+
+ _gcry_poly1305_update (&mac_ctx->ctx, buf, buflen);
+ return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+ if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set)
+ return GPG_ERR_INV_STATE;
+
+ if (!mac_ctx->marks.tag)
+ {
+ _gcry_poly1305_finish(&mac_ctx->ctx, mac_ctx->tag);
+
+ memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+ mac_ctx->marks.tag = 1;
+ }
+
+ if (*outlen == 0)
+ return 0;
+
+ if (*outlen <= POLY1305_TAGLEN)
+ buf_cpy (outbuf, mac_ctx->tag, *outlen);
+ else
+ {
+ buf_cpy (outbuf, mac_ctx->tag, POLY1305_TAGLEN);
+ *outlen = POLY1305_TAGLEN;
+ }
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+ struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+ gcry_err_code_t err;
+ size_t outlen = 0;
+
+ /* Check and finalize tag. */
+ err = poly1305mac_read(h, NULL, &outlen);
+ if (err)
+ return err;
+
+ if (buflen > POLY1305_TAGLEN)
+ return GPG_ERR_INV_LENGTH;
+
+ return buf_eq_const (buf, mac_ctx->tag, buflen) ? 0 : GPG_ERR_CHECKSUM;
+}
+
+
+static unsigned int
+poly1305mac_get_maclen (int algo)
+{
+ (void)algo;
+
+ return POLY1305_TAGLEN;
+}
+
+
+static unsigned int
+poly1305mac_get_keylen (int algo)
+{
+ (void)algo;
+
+ return POLY1305_KEYLEN;
+}
+
+
+static gcry_mac_spec_ops_t poly1305mac_ops = {
+ poly1305mac_open,
+ poly1305mac_close,
+ poly1305mac_setkey,
+ poly1305mac_setiv,
+ poly1305mac_reset,
+ poly1305mac_write,
+ poly1305mac_read,
+ poly1305mac_verify,
+ poly1305mac_get_maclen,
+ poly1305mac_get_keylen,
+ NULL,
+ NULL,
+};
+
+
+gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac = {
+ GCRY_MAC_POLY1305, {0, 0}, "POLY1305",
+ &poly1305mac_ops
+};
+#if USE_AES
+gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes = {
+ GCRY_MAC_POLY1305_AES, {0, 0}, "POLY1305_AES",
+ &poly1305mac_ops
+};
+#endif
+#if USE_CAMELLIA
+gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia = {
+ GCRY_MAC_POLY1305_CAMELLIA, {0, 0}, "POLY1305_CAMELLIA",
+ &poly1305mac_ops
+};
+#endif
+#if USE_TWOFISH
+gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish = {
+ GCRY_MAC_POLY1305_TWOFISH, {0, 0}, "POLY1305_TWOFISH",
+ &poly1305mac_ops
+};
+#endif
+#if USE_SERPENT
+gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent = {
+ GCRY_MAC_POLY1305_SERPENT, {0, 0}, "POLY1305_SERPENT",
+ &poly1305mac_ops
+};
+#endif
+#if USE_SEED
+gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed = {
+ GCRY_MAC_POLY1305_SEED, {0, 0}, "POLY1305_SEED",
+ &poly1305mac_ops
+};
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/mac.c b/comm/third_party/libgcrypt/cipher/mac.c
new file mode 100644
index 0000000000..babe99e3a8
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/mac.c
@@ -0,0 +1,808 @@
+/* mac.c - message authentication code dispatcher
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mac-internal.h"
+
+
+/* This is the list of the digest implementations included in
+ libgcrypt. */
+static gcry_mac_spec_t * const mac_list[] = {
+#if USE_SHA1
+ &_gcry_mac_type_spec_hmac_sha1,
+#endif
+#if USE_SHA256
+ &_gcry_mac_type_spec_hmac_sha256,
+ &_gcry_mac_type_spec_hmac_sha224,
+#endif
+#if USE_SHA512
+ &_gcry_mac_type_spec_hmac_sha512,
+ &_gcry_mac_type_spec_hmac_sha384,
+ &_gcry_mac_type_spec_hmac_sha512_256,
+ &_gcry_mac_type_spec_hmac_sha512_224,
+#endif
+#if USE_SHA3
+ &_gcry_mac_type_spec_hmac_sha3_224,
+ &_gcry_mac_type_spec_hmac_sha3_256,
+ &_gcry_mac_type_spec_hmac_sha3_384,
+ &_gcry_mac_type_spec_hmac_sha3_512,
+#endif
+#ifdef USE_GOST_R_3411_94
+ &_gcry_mac_type_spec_hmac_gost3411_94,
+ &_gcry_mac_type_spec_hmac_gost3411_cp,
+#endif
+#ifdef USE_GOST_R_3411_12
+ &_gcry_mac_type_spec_hmac_stribog256,
+ &_gcry_mac_type_spec_hmac_stribog512,
+#endif
+#if USE_WHIRLPOOL
+ &_gcry_mac_type_spec_hmac_whirlpool,
+#endif
+#if USE_RMD160
+ &_gcry_mac_type_spec_hmac_rmd160,
+#endif
+#if USE_TIGER
+ &_gcry_mac_type_spec_hmac_tiger1,
+#endif
+#if USE_MD5
+ &_gcry_mac_type_spec_hmac_md5,
+#endif
+#if USE_MD4
+ &_gcry_mac_type_spec_hmac_md4,
+#endif
+#if USE_BLAKE2
+ &_gcry_mac_type_spec_hmac_blake2b_512,
+ &_gcry_mac_type_spec_hmac_blake2b_384,
+ &_gcry_mac_type_spec_hmac_blake2b_256,
+ &_gcry_mac_type_spec_hmac_blake2b_160,
+ &_gcry_mac_type_spec_hmac_blake2s_256,
+ &_gcry_mac_type_spec_hmac_blake2s_224,
+ &_gcry_mac_type_spec_hmac_blake2s_160,
+ &_gcry_mac_type_spec_hmac_blake2s_128,
+#endif
+#if USE_SM3
+ &_gcry_mac_type_spec_hmac_sm3,
+#endif
+#if USE_BLOWFISH
+ &_gcry_mac_type_spec_cmac_blowfish,
+#endif
+#if USE_DES
+ &_gcry_mac_type_spec_cmac_tripledes,
+#endif
+#if USE_CAST5
+ &_gcry_mac_type_spec_cmac_cast5,
+#endif
+#if USE_AES
+ &_gcry_mac_type_spec_cmac_aes,
+ &_gcry_mac_type_spec_gmac_aes,
+ &_gcry_mac_type_spec_poly1305mac_aes,
+#endif
+#if USE_TWOFISH
+ &_gcry_mac_type_spec_cmac_twofish,
+ &_gcry_mac_type_spec_gmac_twofish,
+ &_gcry_mac_type_spec_poly1305mac_twofish,
+#endif
+#if USE_SERPENT
+ &_gcry_mac_type_spec_cmac_serpent,
+ &_gcry_mac_type_spec_gmac_serpent,
+ &_gcry_mac_type_spec_poly1305mac_serpent,
+#endif
+#if USE_RFC2268
+ &_gcry_mac_type_spec_cmac_rfc2268,
+#endif
+#if USE_SEED
+ &_gcry_mac_type_spec_cmac_seed,
+ &_gcry_mac_type_spec_gmac_seed,
+ &_gcry_mac_type_spec_poly1305mac_seed,
+#endif
+#if USE_CAMELLIA
+ &_gcry_mac_type_spec_cmac_camellia,
+ &_gcry_mac_type_spec_gmac_camellia,
+ &_gcry_mac_type_spec_poly1305mac_camellia,
+#endif
+#ifdef USE_IDEA
+ &_gcry_mac_type_spec_cmac_idea,
+#endif
+#if USE_GOST28147
+ &_gcry_mac_type_spec_cmac_gost28147,
+ &_gcry_mac_type_spec_gost28147_imit,
+#endif
+ &_gcry_mac_type_spec_poly1305mac,
+#if USE_SM4
+ &_gcry_mac_type_spec_cmac_sm4,
+#endif
+ NULL,
+};
+
+/* HMAC implementations start with index 101 (enum gcry_mac_algos) */
+static gcry_mac_spec_t * const mac_list_algo101[] =
+ {
+#if USE_SHA256
+ &_gcry_mac_type_spec_hmac_sha256,
+ &_gcry_mac_type_spec_hmac_sha224,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_SHA512
+ &_gcry_mac_type_spec_hmac_sha512,
+ &_gcry_mac_type_spec_hmac_sha384,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_SHA1
+ &_gcry_mac_type_spec_hmac_sha1,
+#else
+ NULL,
+#endif
+#if USE_MD5
+ &_gcry_mac_type_spec_hmac_md5,
+#else
+ NULL,
+#endif
+#if USE_MD4
+ &_gcry_mac_type_spec_hmac_md4,
+#else
+ NULL,
+#endif
+#if USE_RMD160
+ &_gcry_mac_type_spec_hmac_rmd160,
+#else
+ NULL,
+#endif
+#if USE_TIGER
+ &_gcry_mac_type_spec_hmac_tiger1,
+#else
+ NULL,
+#endif
+#if USE_WHIRLPOOL
+ &_gcry_mac_type_spec_hmac_whirlpool,
+#else
+ NULL,
+#endif
+#ifdef USE_GOST_R_3411_94
+ &_gcry_mac_type_spec_hmac_gost3411_94,
+#else
+ NULL,
+#endif
+#ifdef USE_GOST_R_3411_12
+ &_gcry_mac_type_spec_hmac_stribog256,
+ &_gcry_mac_type_spec_hmac_stribog512,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_MD2
+ &_gcry_mac_type_spec_hmac_md2,
+#else
+ NULL,
+#endif
+#if USE_SHA3
+ &_gcry_mac_type_spec_hmac_sha3_224,
+ &_gcry_mac_type_spec_hmac_sha3_256,
+ &_gcry_mac_type_spec_hmac_sha3_384,
+ &_gcry_mac_type_spec_hmac_sha3_512,
+#else
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+#endif
+#ifdef USE_GOST_R_3411_94
+ &_gcry_mac_type_spec_hmac_gost3411_cp,
+#else
+ NULL,
+#endif
+#if USE_BLAKE2
+ &_gcry_mac_type_spec_hmac_blake2b_512,
+ &_gcry_mac_type_spec_hmac_blake2b_384,
+ &_gcry_mac_type_spec_hmac_blake2b_256,
+ &_gcry_mac_type_spec_hmac_blake2b_160,
+ &_gcry_mac_type_spec_hmac_blake2s_256,
+ &_gcry_mac_type_spec_hmac_blake2s_224,
+ &_gcry_mac_type_spec_hmac_blake2s_160,
+ &_gcry_mac_type_spec_hmac_blake2s_128,
+#else
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+#endif
+#if USE_SM3
+ &_gcry_mac_type_spec_hmac_sm3,
+#else
+ NULL,
+#endif
+#if USE_SHA512
+ &_gcry_mac_type_spec_hmac_sha512_256,
+ &_gcry_mac_type_spec_hmac_sha512_224,
+#else
+ NULL,
+ NULL,
+#endif
+ };
+
+/* CMAC implementations start with index 201 (enum gcry_mac_algos) */
+static gcry_mac_spec_t * const mac_list_algo201[] =
+ {
+#if USE_AES
+ &_gcry_mac_type_spec_cmac_aes,
+#else
+ NULL,
+#endif
+#if USE_DES
+ &_gcry_mac_type_spec_cmac_tripledes,
+#else
+ NULL,
+#endif
+#if USE_CAMELLIA
+ &_gcry_mac_type_spec_cmac_camellia,
+#else
+ NULL,
+#endif
+#if USE_CAST5
+ &_gcry_mac_type_spec_cmac_cast5,
+#else
+ NULL,
+#endif
+#if USE_BLOWFISH
+ &_gcry_mac_type_spec_cmac_blowfish,
+#else
+ NULL,
+#endif
+#if USE_TWOFISH
+ &_gcry_mac_type_spec_cmac_twofish,
+#else
+ NULL,
+#endif
+#if USE_SERPENT
+ &_gcry_mac_type_spec_cmac_serpent,
+#else
+ NULL,
+#endif
+#if USE_SEED
+ &_gcry_mac_type_spec_cmac_seed,
+#else
+ NULL,
+#endif
+#if USE_RFC2268
+ &_gcry_mac_type_spec_cmac_rfc2268,
+#else
+ NULL,
+#endif
+#ifdef USE_IDEA
+ &_gcry_mac_type_spec_cmac_idea,
+#else
+ NULL,
+#endif
+#if USE_GOST28147
+ &_gcry_mac_type_spec_cmac_gost28147,
+#else
+ NULL,
+#endif
+#if USE_SM4
+ &_gcry_mac_type_spec_cmac_sm4
+#else
+ NULL
+#endif
+ };
+
+/* GMAC implementations start with index 401 (enum gcry_mac_algos) */
+static gcry_mac_spec_t * const mac_list_algo401[] =
+ {
+#if USE_AES
+ &_gcry_mac_type_spec_gmac_aes,
+#else
+ NULL,
+#endif
+#if USE_CAMELLIA
+ &_gcry_mac_type_spec_gmac_camellia,
+#else
+ NULL,
+#endif
+#if USE_TWOFISH
+ &_gcry_mac_type_spec_gmac_twofish,
+#else
+ NULL,
+#endif
+#if USE_SERPENT
+ &_gcry_mac_type_spec_gmac_serpent,
+#else
+ NULL,
+#endif
+#if USE_SEED
+ &_gcry_mac_type_spec_gmac_seed
+#else
+ NULL
+#endif
+ };
+
+/* Poly1305-MAC implementations start with index 501 (enum gcry_mac_algos) */
+static gcry_mac_spec_t * const mac_list_algo501[] =
+ {
+ &_gcry_mac_type_spec_poly1305mac,
+#if USE_AES
+ &_gcry_mac_type_spec_poly1305mac_aes,
+#else
+ NULL,
+#endif
+#if USE_CAMELLIA
+ &_gcry_mac_type_spec_poly1305mac_camellia,
+#else
+ NULL,
+#endif
+#if USE_TWOFISH
+ &_gcry_mac_type_spec_poly1305mac_twofish,
+#else
+ NULL,
+#endif
+#if USE_SERPENT
+ &_gcry_mac_type_spec_poly1305mac_serpent,
+#else
+ NULL,
+#endif
+#if USE_SEED
+ &_gcry_mac_type_spec_poly1305mac_seed
+#else
+ NULL
+#endif
+ };
+
+
+
+
+/* Explicitly initialize this module. */
+gcry_err_code_t
+_gcry_mac_init (void)
+{
+ if (fips_mode())
+ {
+ /* disable algorithms that are disallowed in fips */
+ int idx;
+ gcry_mac_spec_t *spec;
+
+ for (idx = 0; (spec = mac_list[idx]); idx++)
+ if (!spec->flags.fips)
+ spec->flags.disabled = 1;
+ }
+
+ return 0;
+}
+
+
+/* Return the spec structure for the MAC algorithm ALGO. For an
+ unknown algorithm NULL is returned. */
+static gcry_mac_spec_t *
+spec_from_algo (int algo)
+{
+ gcry_mac_spec_t *spec = NULL;
+
+ if (algo >= 101 && algo < 101 + DIM(mac_list_algo101))
+ spec = mac_list_algo101[algo - 101];
+ else if (algo >= 201 && algo < 201 + DIM(mac_list_algo201))
+ spec = mac_list_algo201[algo - 201];
+ else if (algo >= 401 && algo < 401 + DIM(mac_list_algo401))
+ spec = mac_list_algo401[algo - 401];
+ else if (algo >= 501 && algo < 501 + DIM(mac_list_algo501))
+ spec = mac_list_algo501[algo - 501];
+#ifdef USE_GOST28147
+ else if (algo == GCRY_MAC_GOST28147_IMIT)
+ spec = &_gcry_mac_type_spec_gost28147_imit;
+#endif
+
+ if (spec)
+ gcry_assert (spec->algo == algo);
+
+ return spec;
+}
+
+
+/* Lookup a mac's spec by its name. */
+static gcry_mac_spec_t *
+spec_from_name (const char *name)
+{
+ gcry_mac_spec_t *spec;
+ int idx;
+
+ for (idx = 0; (spec = mac_list[idx]); idx++)
+ if (!stricmp (name, spec->name))
+ return spec;
+
+ return NULL;
+}
+
+
+/****************
+ * Map a string to the mac algo
+ */
+int
+_gcry_mac_map_name (const char *string)
+{
+ gcry_mac_spec_t *spec;
+
+ if (!string)
+ return 0;
+
+ /* Not found, search a matching mac name. */
+ spec = spec_from_name (string);
+ if (spec)
+ return spec->algo;
+
+ return 0;
+}
+
+
+/****************
+ * This function simply returns the name of the algorithm or some constant
+ * string when there is no algo. It will never return NULL.
+ * Use the macro gcry_mac_test_algo() to check whether the algorithm
+ * is valid.
+ */
+const char *
+_gcry_mac_algo_name (int algorithm)
+{
+ gcry_mac_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ return spec ? spec->name : "?";
+}
+
+
+static gcry_err_code_t
+check_mac_algo (int algorithm)
+{
+ gcry_mac_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ if (spec && !spec->flags.disabled)
+ return 0;
+
+ return GPG_ERR_MAC_ALGO;
+}
+
+
+/****************
+ * Open a message digest handle for use with algorithm ALGO.
+ */
+static gcry_err_code_t
+mac_open (gcry_mac_hd_t * hd, int algo, int secure, gcry_ctx_t ctx)
+{
+ gcry_mac_spec_t *spec;
+ gcry_err_code_t err;
+ gcry_mac_hd_t h;
+
+ spec = spec_from_algo (algo);
+ if (!spec)
+ return GPG_ERR_MAC_ALGO;
+ else if (spec->flags.disabled)
+ return GPG_ERR_MAC_ALGO;
+ else if (!spec->ops)
+ return GPG_ERR_MAC_ALGO;
+ else if (!spec->ops->open || !spec->ops->write || !spec->ops->setkey ||
+ !spec->ops->read || !spec->ops->verify || !spec->ops->reset)
+ return GPG_ERR_MAC_ALGO;
+
+ if (secure)
+ h = xtrycalloc_secure (1, sizeof (*h));
+ else
+ h = xtrycalloc (1, sizeof (*h));
+
+ if (!h)
+ return gpg_err_code_from_syserror ();
+
+ h->magic = secure ? CTX_MAC_MAGIC_SECURE : CTX_MAC_MAGIC_NORMAL;
+ h->spec = spec;
+ h->algo = algo;
+ h->gcry_ctx = ctx;
+
+ err = h->spec->ops->open (h);
+ if (err)
+ xfree (h);
+ else
+ *hd = h;
+
+ return err;
+}
+
+
+static gcry_err_code_t
+mac_reset (gcry_mac_hd_t hd)
+{
+ if (hd->spec->ops->reset)
+ return hd->spec->ops->reset (hd);
+
+ return 0;
+}
+
+
+static void
+mac_close (gcry_mac_hd_t hd)
+{
+ if (hd->spec->ops->close)
+ hd->spec->ops->close (hd);
+
+ wipememory (hd, sizeof (*hd));
+
+ xfree (hd);
+}
+
+
+static gcry_err_code_t
+mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen)
+{
+ if (!hd->spec->ops->setkey)
+ return GPG_ERR_INV_ARG;
+ if (keylen > 0 && !key)
+ return GPG_ERR_INV_ARG;
+
+ return hd->spec->ops->setkey (hd, key, keylen);
+}
+
+
+static gcry_err_code_t
+mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen)
+{
+ if (!hd->spec->ops->setiv)
+ return GPG_ERR_INV_ARG;
+ if (ivlen > 0 && !iv)
+ return GPG_ERR_INV_ARG;
+
+ return hd->spec->ops->setiv (hd, iv, ivlen);
+}
+
+
+static gcry_err_code_t
+mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen)
+{
+ if (!hd->spec->ops->write)
+ return GPG_ERR_INV_ARG;
+ if (inlen > 0 && !inbuf)
+ return GPG_ERR_INV_ARG;
+
+ return hd->spec->ops->write (hd, inbuf, inlen);
+}
+
+
+static gcry_err_code_t
+mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen)
+{
+ if (!outbuf || !outlen || *outlen == 0 || !hd->spec->ops->read)
+ return GPG_ERR_INV_ARG;
+
+ return hd->spec->ops->read (hd, outbuf, outlen);
+}
+
+
+static gcry_err_code_t
+mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen)
+{
+ if (!buf || buflen == 0 || !hd->spec->ops->verify)
+ return GPG_ERR_INV_ARG;
+
+ return hd->spec->ops->verify (hd, buf, buflen);
+}
+
+
+/* Create a MAC object for algorithm ALGO. FLAGS may be
+ given as an bitwise OR of the gcry_mac_flags values.
+ H is guaranteed to be a valid handle or NULL on error. */
+gpg_err_code_t
+_gcry_mac_open (gcry_mac_hd_t * h, int algo, unsigned int flags,
+ gcry_ctx_t ctx)
+{
+ gcry_err_code_t rc;
+ gcry_mac_hd_t hd = NULL;
+
+ if ((flags & ~GCRY_MAC_FLAG_SECURE))
+ rc = GPG_ERR_INV_ARG;
+ else
+ rc = mac_open (&hd, algo, !!(flags & GCRY_MAC_FLAG_SECURE), ctx);
+
+ *h = rc ? NULL : hd;
+ return rc;
+}
+
+
+void
+_gcry_mac_close (gcry_mac_hd_t hd)
+{
+ if (hd)
+ mac_close (hd);
+}
+
+
+gcry_err_code_t
+_gcry_mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen)
+{
+ return mac_setkey (hd, key, keylen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen)
+{
+ return mac_setiv (hd, iv, ivlen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen)
+{
+ return mac_write (hd, inbuf, inlen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen)
+{
+ return mac_read (hd, outbuf, outlen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen)
+{
+ return mac_verify (hd, buf, buflen);
+}
+
+
+int
+_gcry_mac_get_algo (gcry_mac_hd_t hd)
+{
+ return hd->algo;
+}
+
+
+unsigned int
+_gcry_mac_get_algo_maclen (int algo)
+{
+ gcry_mac_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (!spec || !spec->ops || !spec->ops->get_maclen)
+ return 0;
+
+ return spec->ops->get_maclen (algo);
+}
+
+
+unsigned int
+_gcry_mac_get_algo_keylen (int algo)
+{
+ gcry_mac_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (!spec || !spec->ops || !spec->ops->get_keylen)
+ return 0;
+
+ return spec->ops->get_keylen (algo);
+}
+
+
+gcry_err_code_t
+_gcry_mac_ctl (gcry_mac_hd_t hd, int cmd, void *buffer, size_t buflen)
+{
+ gcry_err_code_t rc;
+
+ /* Currently not used. */
+ (void) hd;
+ (void) buffer;
+ (void) buflen;
+
+ switch (cmd)
+ {
+ case GCRYCTL_RESET:
+ rc = mac_reset (hd);
+ break;
+ case GCRYCTL_SET_SBOX:
+ if (hd->spec->ops->set_extra_info)
+ rc = hd->spec->ops->set_extra_info
+ (hd, GCRYCTL_SET_SBOX, buffer, buflen);
+ else
+ rc = GPG_ERR_NOT_SUPPORTED;
+ break;
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+ return rc;
+}
+
+
+/* Return information about the given MAC algorithm ALGO.
+
+ GCRYCTL_TEST_ALGO:
+ Returns 0 if the specified algorithm ALGO is available for use.
+ BUFFER and NBYTES must be zero.
+
+ Note: Because this function is in most cases used to return an
+ integer value, we can make it easier for the caller to just look at
+ the return value. The caller will in all cases consult the value
+ and thereby detecting whether a error occurred or not (i.e. while
+ checking the block size)
+ */
+gcry_err_code_t
+_gcry_mac_algo_info (int algo, int what, void *buffer, size_t * nbytes)
+{
+ gcry_err_code_t rc = 0;
+ unsigned int ui;
+
+ switch (what)
+ {
+ case GCRYCTL_GET_KEYLEN:
+ if (buffer || (!nbytes))
+ rc = GPG_ERR_INV_ARG;
+ else
+ {
+ ui = _gcry_mac_get_algo_keylen (algo);
+ if (ui > 0)
+ *nbytes = (size_t) ui;
+ else
+ /* The only reason for an error is an invalid algo. */
+ rc = GPG_ERR_MAC_ALGO;
+ }
+ break;
+ case GCRYCTL_TEST_ALGO:
+ if (buffer || nbytes)
+ rc = GPG_ERR_INV_ARG;
+ else
+ rc = check_mac_algo (algo);
+ break;
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+
+ return rc;
+}
+
+
+/* Run the self-tests for the MAC. */
+gpg_error_t
+_gcry_mac_selftest (int algo, int extended, selftest_report_func_t report)
+{
+ gcry_err_code_t ec;
+ gcry_mac_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (spec && !spec->flags.disabled && spec->ops && spec->ops->selftest)
+ ec = spec->ops->selftest (algo, extended, report);
+ else
+ {
+ ec = GPG_ERR_MAC_ALGO;
+ if (report)
+ report ("mac", algo, "module",
+ spec && !spec->flags.disabled?
+ "no selftest available" :
+ spec? "algorithm disabled" :
+ "algorithm not found");
+ }
+
+ return gpg_error (ec);
+}
diff --git a/comm/third_party/libgcrypt/cipher/md.c b/comm/third_party/libgcrypt/cipher/md.c
new file mode 100644
index 0000000000..efb7376a1a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/md.c
@@ -0,0 +1,1639 @@
+/* md.c - message digest dispatcher
+ * Copyright (C) 1998, 1999, 2002, 2003, 2006,
+ * 2008 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2014 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+
+
+/* This is the list of the digest implementations included in
+ libgcrypt. */
+static gcry_md_spec_t * const digest_list[] =
+ {
+#if USE_CRC
+ &_gcry_digest_spec_crc32,
+ &_gcry_digest_spec_crc32_rfc1510,
+ &_gcry_digest_spec_crc24_rfc2440,
+#endif
+#if USE_SHA1
+ &_gcry_digest_spec_sha1,
+#endif
+#if USE_SHA256
+ &_gcry_digest_spec_sha256,
+ &_gcry_digest_spec_sha224,
+#endif
+#if USE_SHA512
+ &_gcry_digest_spec_sha512,
+ &_gcry_digest_spec_sha384,
+ &_gcry_digest_spec_sha512_256,
+ &_gcry_digest_spec_sha512_224,
+#endif
+#if USE_SHA3
+ &_gcry_digest_spec_sha3_224,
+ &_gcry_digest_spec_sha3_256,
+ &_gcry_digest_spec_sha3_384,
+ &_gcry_digest_spec_sha3_512,
+ &_gcry_digest_spec_shake128,
+ &_gcry_digest_spec_shake256,
+#endif
+#if USE_GOST_R_3411_94
+ &_gcry_digest_spec_gost3411_94,
+ &_gcry_digest_spec_gost3411_cp,
+#endif
+#if USE_GOST_R_3411_12
+ &_gcry_digest_spec_stribog_256,
+ &_gcry_digest_spec_stribog_512,
+#endif
+#if USE_WHIRLPOOL
+ &_gcry_digest_spec_whirlpool,
+#endif
+#if USE_RMD160
+ &_gcry_digest_spec_rmd160,
+#endif
+#if USE_TIGER
+ &_gcry_digest_spec_tiger,
+ &_gcry_digest_spec_tiger1,
+ &_gcry_digest_spec_tiger2,
+#endif
+#if USE_MD5
+ &_gcry_digest_spec_md5,
+#endif
+#if USE_MD4
+ &_gcry_digest_spec_md4,
+#endif
+#if USE_MD2
+ &_gcry_digest_spec_md2,
+#endif
+#if USE_BLAKE2
+ &_gcry_digest_spec_blake2b_512,
+ &_gcry_digest_spec_blake2b_384,
+ &_gcry_digest_spec_blake2b_256,
+ &_gcry_digest_spec_blake2b_160,
+ &_gcry_digest_spec_blake2s_256,
+ &_gcry_digest_spec_blake2s_224,
+ &_gcry_digest_spec_blake2s_160,
+ &_gcry_digest_spec_blake2s_128,
+#endif
+#if USE_SM3
+ &_gcry_digest_spec_sm3,
+#endif
+ NULL
+ };
+
+/* Digest implementations starting with index 0 (enum gcry_md_algos) */
+static gcry_md_spec_t * const digest_list_algo0[] =
+ {
+ NULL, /* GCRY_MD_NONE */
+#if USE_MD5
+ &_gcry_digest_spec_md5,
+#else
+ NULL,
+#endif
+#if USE_SHA1
+ &_gcry_digest_spec_sha1,
+#else
+ NULL,
+#endif
+#if USE_RMD160
+ &_gcry_digest_spec_rmd160,
+#else
+ NULL,
+#endif
+ NULL, /* Unused index 4 */
+#if USE_MD2
+ &_gcry_digest_spec_md2,
+#else
+ NULL,
+#endif
+#if USE_TIGER
+ &_gcry_digest_spec_tiger,
+#else
+ NULL,
+#endif
+ NULL, /* GCRY_MD_HAVAL */
+#if USE_SHA256
+ &_gcry_digest_spec_sha256,
+#else
+ NULL,
+#endif
+#if USE_SHA512
+ &_gcry_digest_spec_sha384,
+ &_gcry_digest_spec_sha512,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_SHA256
+ &_gcry_digest_spec_sha224
+#else
+ NULL
+#endif
+ };
+
+/* Digest implementations starting with index 301 (enum gcry_md_algos) */
+static gcry_md_spec_t * const digest_list_algo301[] =
+ {
+#if USE_MD4
+ &_gcry_digest_spec_md4,
+#else
+ NULL,
+#endif
+#if USE_CRC
+ &_gcry_digest_spec_crc32,
+ &_gcry_digest_spec_crc32_rfc1510,
+ &_gcry_digest_spec_crc24_rfc2440,
+#else
+ NULL,
+ NULL,
+ NULL,
+#endif
+#if USE_WHIRLPOOL
+ &_gcry_digest_spec_whirlpool,
+#else
+ NULL,
+#endif
+#if USE_TIGER
+ &_gcry_digest_spec_tiger1,
+ &_gcry_digest_spec_tiger2,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_GOST_R_3411_94
+ &_gcry_digest_spec_gost3411_94,
+#else
+ NULL,
+#endif
+#if USE_GOST_R_3411_12
+ &_gcry_digest_spec_stribog_256,
+ &_gcry_digest_spec_stribog_512,
+#else
+ NULL,
+ NULL,
+#endif
+#if USE_GOST_R_3411_94
+ &_gcry_digest_spec_gost3411_cp,
+#else
+ NULL,
+#endif
+#if USE_SHA3
+ &_gcry_digest_spec_sha3_224,
+ &_gcry_digest_spec_sha3_256,
+ &_gcry_digest_spec_sha3_384,
+ &_gcry_digest_spec_sha3_512,
+ &_gcry_digest_spec_shake128,
+ &_gcry_digest_spec_shake256,
+#else
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+#endif
+#if USE_BLAKE2
+ &_gcry_digest_spec_blake2b_512,
+ &_gcry_digest_spec_blake2b_384,
+ &_gcry_digest_spec_blake2b_256,
+ &_gcry_digest_spec_blake2b_160,
+ &_gcry_digest_spec_blake2s_256,
+ &_gcry_digest_spec_blake2s_224,
+ &_gcry_digest_spec_blake2s_160,
+ &_gcry_digest_spec_blake2s_128,
+#else
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+#endif
+#if USE_SM3
+ &_gcry_digest_spec_sm3,
+#else
+ NULL,
+#endif
+#if USE_SHA512
+ &_gcry_digest_spec_sha512_256,
+ &_gcry_digest_spec_sha512_224,
+#else
+ NULL,
+ NULL,
+#endif
+ };
+
+
+typedef struct gcry_md_list
+{
+ gcry_md_spec_t *spec;
+ struct gcry_md_list *next;
+ size_t actual_struct_size; /* Allocated size of this structure. */
+ PROPERLY_ALIGNED_TYPE context[1];
+} GcryDigestEntry;
+
+/* This structure is put right after the gcry_md_hd_t buffer, so that
+ * only one memory block is needed. */
+struct gcry_md_context
+{
+ int magic;
+ size_t actual_handle_size; /* Allocated size of this handle. */
+ FILE *debug;
+ struct {
+ unsigned int secure:1;
+ unsigned int finalized:1;
+ unsigned int bugemu1:1;
+ unsigned int hmac:1;
+ } flags;
+ GcryDigestEntry *list;
+};
+
+
+#define CTX_MAGIC_NORMAL 0x11071961
+#define CTX_MAGIC_SECURE 0x16917011
+
+static gcry_err_code_t md_enable (gcry_md_hd_t hd, int algo);
+static void md_close (gcry_md_hd_t a);
+static void md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen);
+static byte *md_read( gcry_md_hd_t a, int algo );
+static int md_get_algo( gcry_md_hd_t a );
+static int md_digest_length( int algo );
+static void md_start_debug ( gcry_md_hd_t a, const char *suffix );
+static void md_stop_debug ( gcry_md_hd_t a );
+
+
+
+static int
+map_algo (int algo)
+{
+ return algo;
+}
+
+
+/* Return the spec structure for the hash algorithm ALGO. For an
+ unknown algorithm NULL is returned. */
+static gcry_md_spec_t *
+spec_from_algo (int algo)
+{
+ gcry_md_spec_t *spec = NULL;
+
+ algo = map_algo (algo);
+
+ if (algo >= 0 && algo < DIM(digest_list_algo0))
+ spec = digest_list_algo0[algo];
+ else if (algo >= 301 && algo < 301 + DIM(digest_list_algo301))
+ spec = digest_list_algo301[algo - 301];
+
+ if (spec)
+ gcry_assert (spec->algo == algo);
+
+ return spec;
+}
+
+
+/* Lookup a hash's spec by its name. */
+static gcry_md_spec_t *
+spec_from_name (const char *name)
+{
+ gcry_md_spec_t *spec;
+ int idx;
+
+ for (idx=0; (spec = digest_list[idx]); idx++)
+ {
+ if (!stricmp (name, spec->name))
+ return spec;
+ }
+
+ return NULL;
+}
+
+
+/* Lookup a hash's spec by its OID. */
+static gcry_md_spec_t *
+spec_from_oid (const char *oid)
+{
+ gcry_md_spec_t *spec;
+ gcry_md_oid_spec_t *oid_specs;
+ int idx, j;
+
+ for (idx=0; (spec = digest_list[idx]); idx++)
+ {
+ oid_specs = spec->oids;
+ if (oid_specs)
+ {
+ for (j = 0; oid_specs[j].oidstring; j++)
+ if (!stricmp (oid, oid_specs[j].oidstring))
+ return spec;
+ }
+ }
+
+ return NULL;
+}
+
+
+static gcry_md_spec_t *
+search_oid (const char *oid, gcry_md_oid_spec_t *oid_spec)
+{
+ gcry_md_spec_t *spec;
+ int i;
+
+ if (!oid)
+ return NULL;
+
+ if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4))
+ oid += 4;
+
+ spec = spec_from_oid (oid);
+ if (spec && spec->oids)
+ {
+ for (i = 0; spec->oids[i].oidstring; i++)
+ if (!stricmp (oid, spec->oids[i].oidstring))
+ {
+ if (oid_spec)
+ *oid_spec = spec->oids[i];
+ return spec;
+ }
+ }
+
+ return NULL;
+}
+
+
+/****************
+ * Map a string to the digest algo
+ */
+int
+_gcry_md_map_name (const char *string)
+{
+ gcry_md_spec_t *spec;
+
+ if (!string)
+ return 0;
+
+ /* If the string starts with a digit (optionally prefixed with
+ either "OID." or "oid."), we first look into our table of ASN.1
+ object identifiers to figure out the algorithm */
+ spec = search_oid (string, NULL);
+ if (spec)
+ return spec->algo;
+
+ /* Not found, search a matching digest name. */
+ spec = spec_from_name (string);
+ if (spec)
+ return spec->algo;
+
+ return 0;
+}
+
+
+/****************
+ * This function simply returns the name of the algorithm or some constant
+ * string when there is no algo. It will never return NULL.
+ * Use the macro gcry_md_test_algo() to check whether the algorithm
+ * is valid.
+ */
+const char *
+_gcry_md_algo_name (int algorithm)
+{
+ gcry_md_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ return spec ? spec->name : "?";
+}
+
+
+static gcry_err_code_t
+check_digest_algo (int algorithm)
+{
+ gcry_md_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ if (spec && !spec->flags.disabled)
+ return 0;
+
+ return GPG_ERR_DIGEST_ALGO;
+
+}
+
+
+/****************
+ * Open a message digest handle for use with algorithm ALGO.
+ * More algorithms may be added by md_enable(). The initial algorithm
+ * may be 0.
+ */
+static gcry_err_code_t
+md_open (gcry_md_hd_t *h, int algo, unsigned int flags)
+{
+ gcry_err_code_t err = 0;
+ int secure = !!(flags & GCRY_MD_FLAG_SECURE);
+ int hmac = !!(flags & GCRY_MD_FLAG_HMAC);
+ int bufsize = secure ? 512 : 1024;
+ struct gcry_md_context *ctx;
+ gcry_md_hd_t hd;
+ size_t n;
+
+ /* Allocate a memory area to hold the caller visible buffer with it's
+ * control information and the data required by this module. Set the
+ * context pointer at the beginning to this area.
+ * We have to use this strange scheme because we want to hide the
+ * internal data but have a variable sized buffer.
+ *
+ * +---+------+---........------+-------------+
+ * !ctx! bctl ! buffer ! private !
+ * +---+------+---........------+-------------+
+ * ! ^
+ * !---------------------------!
+ *
+ * We have to make sure that private is well aligned.
+ */
+ n = sizeof (struct gcry_md_handle) + bufsize;
+ n = ((n + sizeof (PROPERLY_ALIGNED_TYPE) - 1)
+ / sizeof (PROPERLY_ALIGNED_TYPE)) * sizeof (PROPERLY_ALIGNED_TYPE);
+
+ /* Allocate and set the Context pointer to the private data */
+ if (secure)
+ hd = xtrymalloc_secure (n + sizeof (struct gcry_md_context));
+ else
+ hd = xtrymalloc (n + sizeof (struct gcry_md_context));
+
+ if (! hd)
+ err = gpg_err_code_from_errno (errno);
+
+ if (! err)
+ {
+ hd->ctx = ctx = (void *) ((char *) hd + n);
+ /* Setup the globally visible data (bctl in the diagram).*/
+ hd->bufsize = n - sizeof (struct gcry_md_handle) + 1;
+ hd->bufpos = 0;
+
+ /* Initialize the private data. */
+ memset (hd->ctx, 0, sizeof *hd->ctx);
+ ctx->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL;
+ ctx->actual_handle_size = n + sizeof (struct gcry_md_context);
+ ctx->flags.secure = secure;
+ ctx->flags.hmac = hmac;
+ ctx->flags.bugemu1 = !!(flags & GCRY_MD_FLAG_BUGEMU1);
+ }
+
+ if (! err)
+ {
+ /* Hmmm, should we really do that? - yes [-wk] */
+ _gcry_fast_random_poll ();
+
+ if (algo)
+ {
+ err = md_enable (hd, algo);
+ if (err)
+ md_close (hd);
+ }
+ }
+
+ if (! err)
+ *h = hd;
+
+ return err;
+}
+
+/* Create a message digest object for algorithm ALGO. FLAGS may be
+ given as an bitwise OR of the gcry_md_flags values. ALGO may be
+ given as 0 if the algorithms to be used are later set using
+ gcry_md_enable. H is guaranteed to be a valid handle or NULL on
+ error. */
+gcry_err_code_t
+_gcry_md_open (gcry_md_hd_t *h, int algo, unsigned int flags)
+{
+ gcry_err_code_t rc;
+ gcry_md_hd_t hd;
+
+ if ((flags & ~(GCRY_MD_FLAG_SECURE
+ | GCRY_MD_FLAG_HMAC
+ | GCRY_MD_FLAG_BUGEMU1)))
+ rc = GPG_ERR_INV_ARG;
+ else
+ rc = md_open (&hd, algo, flags);
+
+ *h = rc? NULL : hd;
+ return rc;
+}
+
+
+
+static gcry_err_code_t
+md_enable (gcry_md_hd_t hd, int algorithm)
+{
+ struct gcry_md_context *h = hd->ctx;
+ gcry_md_spec_t *spec;
+ GcryDigestEntry *entry;
+ gcry_err_code_t err = 0;
+
+ for (entry = h->list; entry; entry = entry->next)
+ if (entry->spec->algo == algorithm)
+ return 0; /* Already enabled */
+
+ spec = spec_from_algo (algorithm);
+ if (!spec)
+ {
+ log_debug ("md_enable: algorithm %d not available\n", algorithm);
+ err = GPG_ERR_DIGEST_ALGO;
+ }
+
+
+ if (!err && algorithm == GCRY_MD_MD5 && fips_mode ())
+ {
+ _gcry_inactivate_fips_mode ("MD5 used");
+ if (_gcry_enforced_fips_mode () )
+ {
+ /* We should never get to here because we do not register
+ MD5 in enforced fips mode. But better throw an error. */
+ err = GPG_ERR_DIGEST_ALGO;
+ }
+ }
+
+ if (!err && h->flags.hmac && spec->read == NULL)
+ {
+ /* Expandable output function cannot act as part of HMAC. */
+ err = GPG_ERR_DIGEST_ALGO;
+ }
+
+ if (!err)
+ {
+ size_t size = (sizeof (*entry)
+ + spec->contextsize * (h->flags.hmac? 3 : 1)
+ - sizeof (entry->context));
+
+ /* And allocate a new list entry. */
+ if (h->flags.secure)
+ entry = xtrymalloc_secure (size);
+ else
+ entry = xtrymalloc (size);
+
+ if (! entry)
+ err = gpg_err_code_from_errno (errno);
+ else
+ {
+ entry->spec = spec;
+ entry->next = h->list;
+ entry->actual_struct_size = size;
+ h->list = entry;
+
+ /* And init this instance. */
+ entry->spec->init (entry->context,
+ h->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
+ }
+ }
+
+ return err;
+}
+
+
+gcry_err_code_t
+_gcry_md_enable (gcry_md_hd_t hd, int algorithm)
+{
+ return md_enable (hd, algorithm);
+}
+
+
+static gcry_err_code_t
+md_copy (gcry_md_hd_t ahd, gcry_md_hd_t *b_hd)
+{
+ gcry_err_code_t err = 0;
+ struct gcry_md_context *a = ahd->ctx;
+ struct gcry_md_context *b;
+ GcryDigestEntry *ar, *br;
+ gcry_md_hd_t bhd;
+ size_t n;
+
+ if (ahd->bufpos)
+ md_write (ahd, NULL, 0);
+
+ n = (char *) ahd->ctx - (char *) ahd;
+ if (a->flags.secure)
+ bhd = xtrymalloc_secure (n + sizeof (struct gcry_md_context));
+ else
+ bhd = xtrymalloc (n + sizeof (struct gcry_md_context));
+
+ if (!bhd)
+ {
+ err = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ bhd->ctx = b = (void *) ((char *) bhd + n);
+ /* No need to copy the buffer due to the write above. */
+ gcry_assert (ahd->bufsize == (n - sizeof (struct gcry_md_handle) + 1));
+ bhd->bufsize = ahd->bufsize;
+ bhd->bufpos = 0;
+ gcry_assert (! ahd->bufpos);
+ memcpy (b, a, sizeof *a);
+ b->list = NULL;
+ b->debug = NULL;
+
+ /* Copy the complete list of algorithms. The copied list is
+ reversed, but that doesn't matter. */
+ for (ar = a->list; ar; ar = ar->next)
+ {
+ if (a->flags.secure)
+ br = xtrymalloc_secure (ar->actual_struct_size);
+ else
+ br = xtrymalloc (ar->actual_struct_size);
+ if (!br)
+ {
+ err = gpg_err_code_from_syserror ();
+ md_close (bhd);
+ goto leave;
+ }
+
+ memcpy (br, ar, ar->actual_struct_size);
+ br->next = b->list;
+ b->list = br;
+ }
+
+ if (a->debug)
+ md_start_debug (bhd, "unknown");
+
+ *b_hd = bhd;
+
+ leave:
+ return err;
+}
+
+
+gcry_err_code_t
+_gcry_md_copy (gcry_md_hd_t *handle, gcry_md_hd_t hd)
+{
+ gcry_err_code_t rc;
+
+ rc = md_copy (hd, handle);
+ if (rc)
+ *handle = NULL;
+ return rc;
+}
+
+
+/*
+ * Reset all contexts and discard any buffered stuff. This may be used
+ * instead of a md_close(); md_open().
+ */
+void
+_gcry_md_reset (gcry_md_hd_t a)
+{
+ GcryDigestEntry *r;
+
+ /* Note: We allow this even in fips non operational mode. */
+
+ a->bufpos = a->ctx->flags.finalized = 0;
+
+ if (a->ctx->flags.hmac)
+ for (r = a->ctx->list; r; r = r->next)
+ {
+ memcpy (r->context, (char *)r->context + r->spec->contextsize,
+ r->spec->contextsize);
+ }
+ else
+ for (r = a->ctx->list; r; r = r->next)
+ {
+ memset (r->context, 0, r->spec->contextsize);
+ (*r->spec->init) (r->context,
+ a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
+ }
+}
+
+
+static void
+md_close (gcry_md_hd_t a)
+{
+ GcryDigestEntry *r, *r2;
+
+ if (! a)
+ return;
+ if (a->ctx->debug)
+ md_stop_debug (a);
+ for (r = a->ctx->list; r; r = r2)
+ {
+ r2 = r->next;
+ wipememory (r, r->actual_struct_size);
+ xfree (r);
+ }
+
+ wipememory (a, a->ctx->actual_handle_size);
+ xfree(a);
+}
+
+
+void
+_gcry_md_close (gcry_md_hd_t hd)
+{
+ /* Note: We allow this even in fips non operational mode. */
+ md_close (hd);
+}
+
+
+static void
+md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen)
+{
+ GcryDigestEntry *r;
+
+ if (a->ctx->debug)
+ {
+ if (a->bufpos && fwrite (a->buf, a->bufpos, 1, a->ctx->debug) != 1)
+ BUG();
+ if (inlen && fwrite (inbuf, inlen, 1, a->ctx->debug) != 1)
+ BUG();
+ }
+
+ for (r = a->ctx->list; r; r = r->next)
+ {
+ if (a->bufpos)
+ (*r->spec->write) (r->context, a->buf, a->bufpos);
+ (*r->spec->write) (r->context, inbuf, inlen);
+ }
+ a->bufpos = 0;
+}
+
+
+/* Note that this function may be used after finalize and read to keep
+ on writing to the transform function so to mitigate timing
+ attacks. */
+void
+_gcry_md_write (gcry_md_hd_t hd, const void *inbuf, size_t inlen)
+{
+ md_write (hd, inbuf, inlen);
+}
+
+
+static void
+md_final (gcry_md_hd_t a)
+{
+ GcryDigestEntry *r;
+
+ if (a->ctx->flags.finalized)
+ return;
+
+ if (a->bufpos)
+ md_write (a, NULL, 0);
+
+ for (r = a->ctx->list; r; r = r->next)
+ (*r->spec->final) (r->context);
+
+ a->ctx->flags.finalized = 1;
+
+ if (!a->ctx->flags.hmac)
+ return;
+
+ for (r = a->ctx->list; r; r = r->next)
+ {
+ byte *p;
+ size_t dlen = r->spec->mdlen;
+ byte *hash;
+ gcry_err_code_t err;
+
+ if (r->spec->read == NULL)
+ continue;
+
+ p = r->spec->read (r->context);
+
+ if (a->ctx->flags.secure)
+ hash = xtrymalloc_secure (dlen);
+ else
+ hash = xtrymalloc (dlen);
+ if (!hash)
+ {
+ err = gpg_err_code_from_errno (errno);
+ _gcry_fatal_error (err, NULL);
+ }
+
+ memcpy (hash, p, dlen);
+ memcpy (r->context, (char *)r->context + r->spec->contextsize * 2,
+ r->spec->contextsize);
+ (*r->spec->write) (r->context, hash, dlen);
+ (*r->spec->final) (r->context);
+ xfree (hash);
+ }
+}
+
+
+static gcry_err_code_t
+md_setkey (gcry_md_hd_t h, const unsigned char *key, size_t keylen)
+{
+ gcry_err_code_t rc = 0;
+ GcryDigestEntry *r;
+ int algo_had_setkey = 0;
+
+ if (!h->ctx->list)
+ return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled. */
+
+ if (h->ctx->flags.hmac)
+ return GPG_ERR_DIGEST_ALGO; /* Tried md_setkey for HMAC md. */
+
+ for (r = h->ctx->list; r; r = r->next)
+ {
+ switch (r->spec->algo)
+ {
+#if USE_BLAKE2
+ /* TODO? add spec->init_with_key? */
+ case GCRY_MD_BLAKE2B_512:
+ case GCRY_MD_BLAKE2B_384:
+ case GCRY_MD_BLAKE2B_256:
+ case GCRY_MD_BLAKE2B_160:
+ case GCRY_MD_BLAKE2S_256:
+ case GCRY_MD_BLAKE2S_224:
+ case GCRY_MD_BLAKE2S_160:
+ case GCRY_MD_BLAKE2S_128:
+ algo_had_setkey = 1;
+ memset (r->context, 0, r->spec->contextsize);
+ rc = _gcry_blake2_init_with_key (r->context,
+ h->ctx->flags.bugemu1
+ ? GCRY_MD_FLAG_BUGEMU1:0,
+ key, keylen, r->spec->algo);
+ break;
+#endif
+ default:
+ rc = GPG_ERR_DIGEST_ALGO;
+ break;
+ }
+
+ if (rc)
+ break;
+ }
+
+ if (rc && !algo_had_setkey)
+ {
+ /* None of algorithms had setkey implementation, so contexts were not
+ * modified. Just return error. */
+ return rc;
+ }
+ else if (rc && algo_had_setkey)
+ {
+ /* Some of the contexts have been modified, but got error. Reset
+ * all contexts. */
+ _gcry_md_reset (h);
+ return rc;
+ }
+
+ /* Successful md_setkey implies reset. */
+ h->bufpos = h->ctx->flags.finalized = 0;
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+prepare_macpads (gcry_md_hd_t a, const unsigned char *key, size_t keylen)
+{
+ GcryDigestEntry *r;
+
+ if (!a->ctx->list)
+ return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled. */
+
+ if (!a->ctx->flags.hmac)
+ return GPG_ERR_DIGEST_ALGO; /* Tried prepare_macpads for non-HMAC md. */
+
+ for (r = a->ctx->list; r; r = r->next)
+ {
+ const unsigned char *k;
+ size_t k_len;
+ unsigned char *key_allocated = NULL;
+ int macpad_Bsize;
+ int i;
+
+ switch (r->spec->algo)
+ {
+ /* TODO: add spec->blocksize */
+ case GCRY_MD_SHA3_224:
+ macpad_Bsize = 1152 / 8;
+ break;
+ case GCRY_MD_SHA3_256:
+ macpad_Bsize = 1088 / 8;
+ break;
+ case GCRY_MD_SHA3_384:
+ macpad_Bsize = 832 / 8;
+ break;
+ case GCRY_MD_SHA3_512:
+ macpad_Bsize = 576 / 8;
+ break;
+ case GCRY_MD_SHA384:
+ case GCRY_MD_SHA512:
+ case GCRY_MD_SHA512_256:
+ case GCRY_MD_SHA512_224:
+ case GCRY_MD_BLAKE2B_512:
+ case GCRY_MD_BLAKE2B_384:
+ case GCRY_MD_BLAKE2B_256:
+ case GCRY_MD_BLAKE2B_160:
+ macpad_Bsize = 128;
+ break;
+ case GCRY_MD_GOSTR3411_94:
+ case GCRY_MD_GOSTR3411_CP:
+ macpad_Bsize = 32;
+ break;
+ default:
+ macpad_Bsize = 64;
+ break;
+ }
+
+ if ( keylen > macpad_Bsize )
+ {
+ k = key_allocated = xtrymalloc_secure (r->spec->mdlen);
+ if (!k)
+ return gpg_err_code_from_errno (errno);
+ _gcry_md_hash_buffer (r->spec->algo, key_allocated, key, keylen);
+ k_len = r->spec->mdlen;
+ gcry_assert ( k_len <= macpad_Bsize );
+ }
+ else
+ {
+ k = key;
+ k_len = keylen;
+ }
+
+ (*r->spec->init) (r->context,
+ a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
+ a->bufpos = 0;
+ for (i=0; i < k_len; i++ )
+ _gcry_md_putc (a, k[i] ^ 0x36);
+ for (; i < macpad_Bsize; i++ )
+ _gcry_md_putc (a, 0x36);
+ (*r->spec->write) (r->context, a->buf, a->bufpos);
+ memcpy ((char *)r->context + r->spec->contextsize, r->context,
+ r->spec->contextsize);
+
+ (*r->spec->init) (r->context,
+ a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
+ a->bufpos = 0;
+ for (i=0; i < k_len; i++ )
+ _gcry_md_putc (a, k[i] ^ 0x5c);
+ for (; i < macpad_Bsize; i++ )
+ _gcry_md_putc (a, 0x5c);
+ (*r->spec->write) (r->context, a->buf, a->bufpos);
+ memcpy ((char *)r->context + r->spec->contextsize*2, r->context,
+ r->spec->contextsize);
+
+ xfree (key_allocated);
+ }
+
+ a->bufpos = 0;
+ return 0;
+}
+
+
+gcry_err_code_t
+_gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, size_t buflen)
+{
+ gcry_err_code_t rc = 0;
+
+ (void)buflen; /* Currently not used. */
+
+ switch (cmd)
+ {
+ case GCRYCTL_FINALIZE:
+ md_final (hd);
+ break;
+ case GCRYCTL_START_DUMP:
+ md_start_debug (hd, buffer);
+ break;
+ case GCRYCTL_STOP_DUMP:
+ md_stop_debug ( hd );
+ break;
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+ return rc;
+}
+
+
+gcry_err_code_t
+_gcry_md_setkey (gcry_md_hd_t hd, const void *key, size_t keylen)
+{
+ gcry_err_code_t rc;
+
+ if (hd->ctx->flags.hmac)
+ {
+ rc = prepare_macpads (hd, key, keylen);
+ if (!rc)
+ _gcry_md_reset (hd);
+ }
+ else
+ {
+ rc = md_setkey (hd, key, keylen);
+ }
+
+ return rc;
+}
+
+
+/* The new debug interface. If SUFFIX is a string it creates an debug
+ file for the context HD. IF suffix is NULL, the file is closed and
+ debugging is stopped. */
+void
+_gcry_md_debug (gcry_md_hd_t hd, const char *suffix)
+{
+ if (suffix)
+ md_start_debug (hd, suffix);
+ else
+ md_stop_debug (hd);
+}
+
+
+/****************
+ * If ALGO is null get the digest for the used algo (which should be
+ * only one)
+ */
+static byte *
+md_read( gcry_md_hd_t a, int algo )
+{
+ GcryDigestEntry *r = a->ctx->list;
+
+ if (! algo)
+ {
+ /* Return the first algorithm */
+ if (r)
+ {
+ if (r->next)
+ log_debug ("more than one algorithm in md_read(0)\n");
+ if (r->spec->read)
+ return r->spec->read (r->context);
+ }
+ }
+ else
+ {
+ for (r = a->ctx->list; r; r = r->next)
+ if (r->spec->algo == algo)
+ {
+ if (r->spec->read)
+ return r->spec->read (r->context);
+ break;
+ }
+ }
+
+ if (r && !r->spec->read)
+ _gcry_fatal_error (GPG_ERR_DIGEST_ALGO,
+ "requested algo has no fixed digest length");
+ else
+ _gcry_fatal_error (GPG_ERR_DIGEST_ALGO, "requested algo not in md context");
+ return NULL;
+}
+
+
+/*
+ * Read out the complete digest, this function implictly finalizes
+ * the hash.
+ */
+byte *
+_gcry_md_read (gcry_md_hd_t hd, int algo)
+{
+ /* This function is expected to always return a digest, thus we
+ can't return an error which we actually should do in
+ non-operational state. */
+ _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+ return md_read (hd, algo);
+}
+
+
+/****************
+ * If ALGO is null get the digest for the used algo (which should be
+ * only one)
+ */
+static gcry_err_code_t
+md_extract(gcry_md_hd_t a, int algo, void *out, size_t outlen)
+{
+ GcryDigestEntry *r = a->ctx->list;
+
+ if (!algo)
+ {
+ /* Return the first algorithm */
+ if (r && r->spec->extract)
+ {
+ if (r->next)
+ log_debug ("more than one algorithm in md_extract(0)\n");
+ r->spec->extract (r->context, out, outlen);
+ return 0;
+ }
+ }
+ else
+ {
+ for (r = a->ctx->list; r; r = r->next)
+ if (r->spec->algo == algo && r->spec->extract)
+ {
+ r->spec->extract (r->context, out, outlen);
+ return 0;
+ }
+ }
+
+ return GPG_ERR_DIGEST_ALGO;
+}
+
+
+/*
+ * Expand the output from XOF class digest, this function implictly finalizes
+ * the hash.
+ */
+gcry_err_code_t
+_gcry_md_extract (gcry_md_hd_t hd, int algo, void *out, size_t outlen)
+{
+ _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+ return md_extract (hd, algo, out, outlen);
+}
+
+
+/*
+ * Read out an intermediate digest. Not yet functional.
+ */
+gcry_err_code_t
+_gcry_md_get (gcry_md_hd_t hd, int algo, byte *buffer, int buflen)
+{
+ (void)hd;
+ (void)algo;
+ (void)buffer;
+ (void)buflen;
+
+ /*md_digest ... */
+ fips_signal_error ("unimplemented function called");
+ return GPG_ERR_INTERNAL;
+}
+
+
+/*
+ * Shortcut function to hash a buffer with a given algo. The only
+ * guaranteed supported algorithms are RIPE-MD160 and SHA-1. The
+ * supplied digest buffer must be large enough to store the resulting
+ * hash. No error is returned, the function will abort on an invalid
+ * algo. DISABLED_ALGOS are ignored here. */
+void
+_gcry_md_hash_buffer (int algo, void *digest,
+ const void *buffer, size_t length)
+{
+ gcry_md_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (!spec)
+ {
+ log_debug ("md_hash_buffer: algorithm %d not available\n", algo);
+ return;
+ }
+
+ if (algo == GCRY_MD_MD5 && fips_mode ())
+ {
+ _gcry_inactivate_fips_mode ("MD5 used");
+ if (_gcry_enforced_fips_mode () )
+ {
+ /* We should never get to here because we do not register
+ MD5 in enforced fips mode. */
+ _gcry_fips_noreturn ();
+ }
+ }
+
+ if (spec->hash_buffer != NULL)
+ {
+ spec->hash_buffer (digest, buffer, length);
+ }
+ else if (spec->hash_buffers != NULL)
+ {
+ gcry_buffer_t iov;
+
+ iov.size = 0;
+ iov.data = (void *)buffer;
+ iov.off = 0;
+ iov.len = length;
+
+ spec->hash_buffers (digest, &iov, 1);
+ }
+ else
+ {
+ /* For the others we do not have a fast function, so we use the
+ normal functions. */
+ gcry_md_hd_t h;
+ gpg_err_code_t err;
+
+ err = md_open (&h, algo, 0);
+ if (err)
+ log_bug ("gcry_md_open failed for algo %d: %s",
+ algo, gpg_strerror (gcry_error(err)));
+ md_write (h, (byte *) buffer, length);
+ md_final (h);
+ memcpy (digest, md_read (h, algo), md_digest_length (algo));
+ md_close (h);
+ }
+}
+
+
+/* Shortcut function to hash multiple buffers with a given algo. In
+ contrast to gcry_md_hash_buffer, this function returns an error on
+ invalid arguments or on other problems; disabled algorithms are
+ _not_ ignored but flagged as an error.
+
+ The data to sign is taken from the array IOV which has IOVCNT items.
+
+ The only supported flag in FLAGS is GCRY_MD_FLAG_HMAC which turns
+ this function into a HMAC function; the first item in IOV is then
+ used as the key.
+
+ On success 0 is returned and resulting hash or HMAC is stored at
+ DIGEST which must have been provided by the caller with an
+ appropriate length. */
+gpg_err_code_t
+_gcry_md_hash_buffers (int algo, unsigned int flags, void *digest,
+ const gcry_buffer_t *iov, int iovcnt)
+{
+ gcry_md_spec_t *spec;
+ int hmac;
+
+ if (!iov || iovcnt < 0)
+ return GPG_ERR_INV_ARG;
+ if (flags & ~(GCRY_MD_FLAG_HMAC))
+ return GPG_ERR_INV_ARG;
+
+ hmac = !!(flags & GCRY_MD_FLAG_HMAC);
+ if (hmac && iovcnt < 1)
+ return GPG_ERR_INV_ARG;
+
+ spec = spec_from_algo (algo);
+ if (!spec)
+ {
+ log_debug ("md_hash_buffers: algorithm %d not available\n", algo);
+ return GPG_ERR_DIGEST_ALGO;
+ }
+
+ if (algo == GCRY_MD_MD5 && fips_mode ())
+ {
+ _gcry_inactivate_fips_mode ("MD5 used");
+ if (_gcry_enforced_fips_mode () )
+ {
+ /* We should never get to here because we do not register
+ MD5 in enforced fips mode. */
+ _gcry_fips_noreturn ();
+ }
+ }
+
+ if (!hmac && spec->hash_buffers)
+ {
+ spec->hash_buffers (digest, iov, iovcnt);
+ }
+ else
+ {
+ /* For the others we do not have a fast function, so we use the
+ normal functions. */
+ gcry_md_hd_t h;
+ gpg_err_code_t rc;
+ int dlen;
+
+ /* Detect SHAKE128 like algorithms which we can't use because
+ * our API does not allow for a variable length digest. */
+ dlen = md_digest_length (algo);
+ if (!dlen)
+ return GPG_ERR_DIGEST_ALGO;
+
+ rc = md_open (&h, algo, (hmac? GCRY_MD_FLAG_HMAC:0));
+ if (rc)
+ return rc;
+
+ if (hmac)
+ {
+ rc = _gcry_md_setkey (h,
+ (const char*)iov[0].data + iov[0].off,
+ iov[0].len);
+ if (rc)
+ {
+ md_close (h);
+ return rc;
+ }
+ iov++; iovcnt--;
+ }
+ for (;iovcnt; iov++, iovcnt--)
+ md_write (h, (const char*)iov[0].data + iov[0].off, iov[0].len);
+ md_final (h);
+ memcpy (digest, md_read (h, algo), dlen);
+ md_close (h);
+ }
+
+ return 0;
+}
+
+
+static int
+md_get_algo (gcry_md_hd_t a)
+{
+ GcryDigestEntry *r = a->ctx->list;
+
+ if (r && r->next)
+ {
+ fips_signal_error ("possible usage error");
+ log_error ("WARNING: more than one algorithm in md_get_algo()\n");
+ }
+ return r ? r->spec->algo : 0;
+}
+
+
+int
+_gcry_md_get_algo (gcry_md_hd_t hd)
+{
+ return md_get_algo (hd);
+}
+
+
+/****************
+ * Return the length of the digest
+ */
+static int
+md_digest_length (int algorithm)
+{
+ gcry_md_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ return spec? spec->mdlen : 0;
+}
+
+
+/****************
+ * Return the length of the digest in bytes.
+ * This function will return 0 in case of errors.
+ */
+unsigned int
+_gcry_md_get_algo_dlen (int algorithm)
+{
+ return md_digest_length (algorithm);
+}
+
+
+/* Hmmm: add a mode to enumerate the OIDs
+ * to make g10/sig-check.c more portable */
+static const byte *
+md_asn_oid (int algorithm, size_t *asnlen, size_t *mdlen)
+{
+ gcry_md_spec_t *spec;
+ const byte *asnoid = NULL;
+
+ spec = spec_from_algo (algorithm);
+ if (spec)
+ {
+ if (asnlen)
+ *asnlen = spec->asnlen;
+ if (mdlen)
+ *mdlen = spec->mdlen;
+ asnoid = spec->asnoid;
+ }
+ else
+ log_bug ("no ASN.1 OID for md algo %d\n", algorithm);
+
+ return asnoid;
+}
+
+
+/****************
+ * Return information about the given cipher algorithm
+ * WHAT select the kind of information returned:
+ * GCRYCTL_TEST_ALGO:
+ * Returns 0 when the specified algorithm is available for use.
+ * buffer and nbytes must be zero.
+ * GCRYCTL_GET_ASNOID:
+ * Return the ASNOID of the algorithm in buffer. if buffer is NULL, only
+ * the required length is returned.
+ * GCRYCTL_SELFTEST
+ * Helper for the regression tests - shall not be used by applications.
+ *
+ * Note: Because this function is in most cases used to return an
+ * integer value, we can make it easier for the caller to just look at
+ * the return value. The caller will in all cases consult the value
+ * and thereby detecting whether a error occurred or not (i.e. while checking
+ * the block size)
+ */
+gcry_err_code_t
+_gcry_md_algo_info (int algo, int what, void *buffer, size_t *nbytes)
+{
+ gcry_err_code_t rc;
+
+ switch (what)
+ {
+ case GCRYCTL_TEST_ALGO:
+ if (buffer || nbytes)
+ rc = GPG_ERR_INV_ARG;
+ else
+ rc = check_digest_algo (algo);
+ break;
+
+ case GCRYCTL_GET_ASNOID:
+ /* We need to check that the algo is available because
+ md_asn_oid would otherwise raise an assertion. */
+ rc = check_digest_algo (algo);
+ if (!rc)
+ {
+ const char unsigned *asn;
+ size_t asnlen;
+
+ asn = md_asn_oid (algo, &asnlen, NULL);
+ if (buffer && (*nbytes >= asnlen))
+ {
+ memcpy (buffer, asn, asnlen);
+ *nbytes = asnlen;
+ }
+ else if (!buffer && nbytes)
+ *nbytes = asnlen;
+ else
+ {
+ if (buffer)
+ rc = GPG_ERR_TOO_SHORT;
+ else
+ rc = GPG_ERR_INV_ARG;
+ }
+ }
+ break;
+
+ case GCRYCTL_SELFTEST:
+ /* Helper function for the regression tests. */
+ rc = gpg_err_code (_gcry_md_selftest (algo, nbytes? (int)*nbytes : 0,
+ NULL));
+ break;
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ break;
+ }
+
+ return rc;
+}
+
+
+static void
+md_start_debug ( gcry_md_hd_t md, const char *suffix )
+{
+ static int idx=0;
+ char buf[50];
+
+ if (fips_mode ())
+ return;
+
+ if ( md->ctx->debug )
+ {
+ log_debug("Oops: md debug already started\n");
+ return;
+ }
+ idx++;
+ snprintf (buf, DIM(buf)-1, "dbgmd-%05d.%.10s", idx, suffix );
+ md->ctx->debug = fopen(buf, "w");
+ if ( !md->ctx->debug )
+ log_debug("md debug: can't open %s\n", buf );
+}
+
+
+static void
+md_stop_debug( gcry_md_hd_t md )
+{
+ if ( md->ctx->debug )
+ {
+ if ( md->bufpos )
+ md_write ( md, NULL, 0 );
+ fclose (md->ctx->debug);
+ md->ctx->debug = NULL;
+ }
+
+ { /* a kludge to pull in the __muldi3 for Solaris */
+ volatile u32 a = (u32)(uintptr_t)md;
+ volatile u64 b = 42;
+ volatile u64 c;
+ c = a * b;
+ (void)c;
+ }
+}
+
+
+
+/*
+ * Return information about the digest handle.
+ * GCRYCTL_IS_SECURE:
+ * Returns 1 when the handle works on secured memory
+ * otherwise 0 is returned. There is no error return.
+ * GCRYCTL_IS_ALGO_ENABLED:
+ * Returns 1 if the algo is enabled for that handle.
+ * The algo must be passed as the address of an int.
+ */
+gcry_err_code_t
+_gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, size_t *nbytes)
+{
+ gcry_err_code_t rc = 0;
+
+ switch (cmd)
+ {
+ case GCRYCTL_IS_SECURE:
+ *nbytes = h->ctx->flags.secure;
+ break;
+
+ case GCRYCTL_IS_ALGO_ENABLED:
+ {
+ GcryDigestEntry *r;
+ int algo;
+
+ if ( !buffer || !nbytes || *nbytes != sizeof (int))
+ rc = GPG_ERR_INV_ARG;
+ else
+ {
+ algo = *(int*)buffer;
+
+ *nbytes = 0;
+ for(r=h->ctx->list; r; r = r->next ) {
+ if (r->spec->algo == algo)
+ {
+ *nbytes = 1;
+ break;
+ }
+ }
+ }
+ break;
+ }
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+
+ return rc;
+}
+
+
+/* Explicitly initialize this module. */
+gcry_err_code_t
+_gcry_md_init (void)
+{
+ if (fips_mode())
+ {
+ /* disable algorithms that are disallowed in fips */
+ int idx;
+ gcry_md_spec_t *spec;
+
+ for (idx = 0; (spec = digest_list[idx]); idx++)
+ if (!spec->flags.fips)
+ spec->flags.disabled = 1;
+ }
+
+ return 0;
+}
+
+
+int
+_gcry_md_is_secure (gcry_md_hd_t a)
+{
+ size_t value;
+
+ if (_gcry_md_info (a, GCRYCTL_IS_SECURE, NULL, &value))
+ value = 1; /* It seems to be better to assume secure memory on
+ error. */
+ return value;
+}
+
+
+int
+_gcry_md_is_enabled (gcry_md_hd_t a, int algo)
+{
+ size_t value;
+
+ value = sizeof algo;
+ if (_gcry_md_info (a, GCRYCTL_IS_ALGO_ENABLED, &algo, &value))
+ value = 0;
+ return value;
+}
+
+
+/* Run the selftests for digest algorithm ALGO with optional reporting
+ function REPORT. */
+gpg_error_t
+_gcry_md_selftest (int algo, int extended, selftest_report_func_t report)
+{
+ gcry_err_code_t ec = 0;
+ gcry_md_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (spec && !spec->flags.disabled && spec->selftest)
+ ec = spec->selftest (algo, extended, report);
+ else
+ {
+ ec = (spec && spec->selftest) ? GPG_ERR_DIGEST_ALGO
+ /* */ : GPG_ERR_NOT_IMPLEMENTED;
+ if (report)
+ report ("digest", algo, "module",
+ (spec && !spec->flags.disabled)?
+ "no selftest available" :
+ spec? "algorithm disabled" : "algorithm not found");
+ }
+
+ return gpg_error (ec);
+}
diff --git a/comm/third_party/libgcrypt/cipher/md4.c b/comm/third_party/libgcrypt/cipher/md4.c
new file mode 100644
index 0000000000..b55443a8aa
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/md4.c
@@ -0,0 +1,296 @@
+/* md4.c - MD4 Message-Digest Algorithm
+ * Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Based on md5.c in libgcrypt, but rewritten to compute md4 checksums
+ * using a public domain md4 implementation with the following comments:
+ *
+ * Modified by Wei Dai from Andrew M. Kuchling's md4.c
+ * The original code and all modifications are in the public domain.
+ *
+ * This is the original introductory comment:
+ *
+ * md4.c : MD4 hash algorithm.
+ *
+ * Part of the Python Cryptography Toolkit, version 1.1
+ *
+ * Distribute and use freely; there are no restrictions on further
+ * dissemination and usage except those imposed by the laws of your
+ * country of residence.
+ *
+ */
+
+/* MD4 test suite:
+ * MD4 ("") = 31d6cfe0d16ae931b73c59d7e0c089c0
+ * MD4 ("a") = bde52cb31de33e46245e05fbdbd6fb24
+ * MD4 ("abc") = a448017aaf21d8525fc10ae87aa6729d
+ * MD4 ("message digest") = d9130a8164549fe818874806e1c7014b
+ * MD4 ("abcdefghijklmnopqrstuvwxyz") = d79e1c308aa5bbcdeea8ed63df412da9
+ * MD4 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") =
+ * 043f8582f241db351ce627e153e7f0e4
+ * MD4 ("123456789012345678901234567890123456789012345678901234567890123456
+ * 78901234567890") = e33b4ddc9c38f2199c3e7b164fcc0536
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "hash-common.h"
+
+
+typedef struct {
+ gcry_md_block_ctx_t bctx;
+ u32 A,B,C,D; /* chaining variables */
+} MD4_CONTEXT;
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks );
+
+static void
+md4_init (void *context, unsigned int flags)
+{
+ MD4_CONTEXT *ctx = context;
+
+ (void)flags;
+
+ ctx->A = 0x67452301;
+ ctx->B = 0xefcdab89;
+ ctx->C = 0x98badcfe;
+ ctx->D = 0x10325476;
+
+ ctx->bctx.nblocks = 0;
+ ctx->bctx.nblocks_high = 0;
+ ctx->bctx.count = 0;
+ ctx->bctx.blocksize_shift = _gcry_ctz(64);
+ ctx->bctx.bwrite = transform;
+}
+
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+
+/****************
+ * transform 64 bytes
+ */
+static unsigned int
+transform_blk ( void *c, const unsigned char *data )
+{
+ MD4_CONTEXT *ctx = c;
+ u32 in[16];
+ register u32 A = ctx->A;
+ register u32 B = ctx->B;
+ register u32 C = ctx->C;
+ register u32 D = ctx->D;
+ int i;
+
+ for ( i = 0; i < 16; i++ )
+ in[i] = buf_get_le32(data + i * 4);
+
+ /* Round 1. */
+#define function(a,b,c,d,k,s) a=rol(a+F(b,c,d)+in[k],s);
+ function(A,B,C,D, 0, 3);
+ function(D,A,B,C, 1, 7);
+ function(C,D,A,B, 2,11);
+ function(B,C,D,A, 3,19);
+ function(A,B,C,D, 4, 3);
+ function(D,A,B,C, 5, 7);
+ function(C,D,A,B, 6,11);
+ function(B,C,D,A, 7,19);
+ function(A,B,C,D, 8, 3);
+ function(D,A,B,C, 9, 7);
+ function(C,D,A,B,10,11);
+ function(B,C,D,A,11,19);
+ function(A,B,C,D,12, 3);
+ function(D,A,B,C,13, 7);
+ function(C,D,A,B,14,11);
+ function(B,C,D,A,15,19);
+
+#undef function
+
+ /* Round 2. */
+#define function(a,b,c,d,k,s) a=rol(a+G(b,c,d)+in[k]+0x5a827999,s);
+
+ function(A,B,C,D, 0, 3);
+ function(D,A,B,C, 4, 5);
+ function(C,D,A,B, 8, 9);
+ function(B,C,D,A,12,13);
+ function(A,B,C,D, 1, 3);
+ function(D,A,B,C, 5, 5);
+ function(C,D,A,B, 9, 9);
+ function(B,C,D,A,13,13);
+ function(A,B,C,D, 2, 3);
+ function(D,A,B,C, 6, 5);
+ function(C,D,A,B,10, 9);
+ function(B,C,D,A,14,13);
+ function(A,B,C,D, 3, 3);
+ function(D,A,B,C, 7, 5);
+ function(C,D,A,B,11, 9);
+ function(B,C,D,A,15,13);
+
+#undef function
+
+ /* Round 3. */
+#define function(a,b,c,d,k,s) a=rol(a+H(b,c,d)+in[k]+0x6ed9eba1,s);
+
+ function(A,B,C,D, 0, 3);
+ function(D,A,B,C, 8, 9);
+ function(C,D,A,B, 4,11);
+ function(B,C,D,A,12,15);
+ function(A,B,C,D, 2, 3);
+ function(D,A,B,C,10, 9);
+ function(C,D,A,B, 6,11);
+ function(B,C,D,A,14,15);
+ function(A,B,C,D, 1, 3);
+ function(D,A,B,C, 9, 9);
+ function(C,D,A,B, 5,11);
+ function(B,C,D,A,13,15);
+ function(A,B,C,D, 3, 3);
+ function(D,A,B,C,11, 9);
+ function(C,D,A,B, 7,11);
+ function(B,C,D,A,15,15);
+
+
+ /* Put checksum in context given as argument. */
+ ctx->A += A;
+ ctx->B += B;
+ ctx->C += C;
+ ctx->D += D;
+
+ return /*burn_stack*/ 80+6*sizeof(void*);
+}
+
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+ unsigned int burn;
+
+ do
+ {
+ burn = transform_blk (c, data);
+ data += 64;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+
+/* The routine final terminates the message-digest computation and
+ * ends with the desired message digest in mdContext->digest[0...15].
+ * The handle is prepared for a new MD4 cycle.
+ * Returns 16 bytes representing the digest.
+ */
+
+static void
+md4_final( void *context )
+{
+ MD4_CONTEXT *hd = context;
+ u32 t, th, msb, lsb;
+ byte *p;
+ unsigned int burn;
+
+ t = hd->bctx.nblocks;
+ if (sizeof t == sizeof hd->bctx.nblocks)
+ th = hd->bctx.nblocks_high;
+ else
+ th = hd->bctx.nblocks >> 32;
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 26);
+ /* add the count */
+ t = lsb;
+ if( (lsb += hd->bctx.count) < t )
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if (hd->bctx.count < 56) /* enough room */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+ if (hd->bctx.count < 56)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 60, msb);
+ burn = transform (hd, hd->bctx.buf, 1);
+ }
+ else /* need one extra block */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+ /* fill pad and next block with zeroes */
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+ burn = transform (hd, hd->bctx.buf, 2);
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0)
+ X(A);
+ X(B);
+ X(C);
+ X(D);
+#undef X
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static byte *
+md4_read (void *context)
+{
+ MD4_CONTEXT *hd = context;
+ return hd->bctx.buf;
+}
+
+static byte asn[18] = /* Object ID is 1.2.840.113549.2.4 */
+ { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48,
+ 0x86, 0xf7, 0x0d, 0x02, 0x04, 0x05, 0x00, 0x04, 0x10 };
+
+static gcry_md_oid_spec_t oid_spec_md4[] =
+ {
+ /* iso.member-body.us.rsadsi.digestAlgorithm.md4 */
+ { "1.2.840.113549.2.4" },
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_md4 =
+ {
+ GCRY_MD_MD4, {0, 0},
+ "MD4", asn, DIM (asn), oid_spec_md4,16,
+ md4_init, _gcry_md_block_write, md4_final, md4_read, NULL,
+ NULL, NULL,
+ sizeof (MD4_CONTEXT)
+ };
diff --git a/comm/third_party/libgcrypt/cipher/md5.c b/comm/third_party/libgcrypt/cipher/md5.c
new file mode 100644
index 0000000000..32cb535aaa
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/md5.c
@@ -0,0 +1,322 @@
+/* md5.c - MD5 Message-Digest Algorithm
+ * Copyright (C) 1995,1996,1998,1999,2001,2002,
+ * 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * According to the definition of MD5 in RFC 1321 from April 1992.
+ * NOTE: This is *not* the same file as the one from glibc.
+ * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
+ * heavily modified for GnuPG by Werner Koch <wk@gnupg.org>
+ */
+
+/* Test values:
+ * "" D4 1D 8C D9 8F 00 B2 04 E9 80 09 98 EC F8 42 7E
+ * "a" 0C C1 75 B9 C0 F1 B6 A8 31 C3 99 E2 69 77 26 61
+ * "abc 90 01 50 98 3C D2 4F B0 D6 96 3F 7D 28 E1 7F 72
+ * "message digest" F9 6B 69 7D 7C B7 93 8D 52 5A 2F 31 AA F1 61 D0
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "hash-common.h"
+
+
+typedef struct {
+ gcry_md_block_ctx_t bctx;
+ u32 A,B,C,D; /* chaining variables */
+} MD5_CONTEXT;
+
+static unsigned int
+transform ( void *ctx, const unsigned char *data, size_t datalen );
+
+static void
+md5_init( void *context, unsigned int flags)
+{
+ MD5_CONTEXT *ctx = context;
+
+ (void)flags;
+
+ ctx->A = 0x67452301;
+ ctx->B = 0xefcdab89;
+ ctx->C = 0x98badcfe;
+ ctx->D = 0x10325476;
+
+ ctx->bctx.nblocks = 0;
+ ctx->bctx.nblocks_high = 0;
+ ctx->bctx.count = 0;
+ ctx->bctx.blocksize_shift = _gcry_ctz(64);
+ ctx->bctx.bwrite = transform;
+}
+
+
+/* These are the four functions used in the four steps of the MD5 algorithm
+ and defined in the RFC 1321. The first function is a little bit optimized
+ (as found in Colin Plumbs public domain implementation). */
+/* #define FF(b, c, d) ((b & c) | (~b & d)) */
+#define FF(b, c, d) (d ^ (b & (c ^ d)))
+#define FG(b, c, d) FF (d, b, c)
+#define FH(b, c, d) (b ^ c ^ d)
+#define FI(b, c, d) (c ^ (b | ~d))
+
+
+/****************
+ * transform 64 bytes
+ */
+static unsigned int
+transform_blk ( void *c, const unsigned char *data )
+{
+ MD5_CONTEXT *ctx = c;
+ u32 correct_words[16];
+ register u32 A = ctx->A;
+ register u32 B = ctx->B;
+ register u32 C = ctx->C;
+ register u32 D = ctx->D;
+ u32 *cwp = correct_words;
+ int i;
+
+ for ( i = 0; i < 16; i++ )
+ correct_words[i] = buf_get_le32(data + i * 4);
+
+#define OP(a, b, c, d, s, T) \
+ do \
+ { \
+ a += FF (b, c, d) + (*cwp++) + T; \
+ a = rol(a, s); \
+ a += b; \
+ } \
+ while (0)
+
+ /* Before we start, one word about the strange constants.
+ They are defined in RFC 1321 as
+
+ T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
+ */
+
+ /* Round 1. */
+ OP (A, B, C, D, 7, 0xd76aa478);
+ OP (D, A, B, C, 12, 0xe8c7b756);
+ OP (C, D, A, B, 17, 0x242070db);
+ OP (B, C, D, A, 22, 0xc1bdceee);
+ OP (A, B, C, D, 7, 0xf57c0faf);
+ OP (D, A, B, C, 12, 0x4787c62a);
+ OP (C, D, A, B, 17, 0xa8304613);
+ OP (B, C, D, A, 22, 0xfd469501);
+ OP (A, B, C, D, 7, 0x698098d8);
+ OP (D, A, B, C, 12, 0x8b44f7af);
+ OP (C, D, A, B, 17, 0xffff5bb1);
+ OP (B, C, D, A, 22, 0x895cd7be);
+ OP (A, B, C, D, 7, 0x6b901122);
+ OP (D, A, B, C, 12, 0xfd987193);
+ OP (C, D, A, B, 17, 0xa679438e);
+ OP (B, C, D, A, 22, 0x49b40821);
+
+#undef OP
+#define OP(f, a, b, c, d, k, s, T) \
+ do \
+ { \
+ a += f (b, c, d) + correct_words[k] + T; \
+ a = rol(a, s); \
+ a += b; \
+ } \
+ while (0)
+
+ /* Round 2. */
+ OP (FG, A, B, C, D, 1, 5, 0xf61e2562);
+ OP (FG, D, A, B, C, 6, 9, 0xc040b340);
+ OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
+ OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
+ OP (FG, A, B, C, D, 5, 5, 0xd62f105d);
+ OP (FG, D, A, B, C, 10, 9, 0x02441453);
+ OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
+ OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
+ OP (FG, A, B, C, D, 9, 5, 0x21e1cde6);
+ OP (FG, D, A, B, C, 14, 9, 0xc33707d6);
+ OP (FG, C, D, A, B, 3, 14, 0xf4d50d87);
+ OP (FG, B, C, D, A, 8, 20, 0x455a14ed);
+ OP (FG, A, B, C, D, 13, 5, 0xa9e3e905);
+ OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8);
+ OP (FG, C, D, A, B, 7, 14, 0x676f02d9);
+ OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
+
+ /* Round 3. */
+ OP (FH, A, B, C, D, 5, 4, 0xfffa3942);
+ OP (FH, D, A, B, C, 8, 11, 0x8771f681);
+ OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
+ OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
+ OP (FH, A, B, C, D, 1, 4, 0xa4beea44);
+ OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9);
+ OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60);
+ OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
+ OP (FH, A, B, C, D, 13, 4, 0x289b7ec6);
+ OP (FH, D, A, B, C, 0, 11, 0xeaa127fa);
+ OP (FH, C, D, A, B, 3, 16, 0xd4ef3085);
+ OP (FH, B, C, D, A, 6, 23, 0x04881d05);
+ OP (FH, A, B, C, D, 9, 4, 0xd9d4d039);
+ OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
+ OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
+ OP (FH, B, C, D, A, 2, 23, 0xc4ac5665);
+
+ /* Round 4. */
+ OP (FI, A, B, C, D, 0, 6, 0xf4292244);
+ OP (FI, D, A, B, C, 7, 10, 0x432aff97);
+ OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
+ OP (FI, B, C, D, A, 5, 21, 0xfc93a039);
+ OP (FI, A, B, C, D, 12, 6, 0x655b59c3);
+ OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92);
+ OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
+ OP (FI, B, C, D, A, 1, 21, 0x85845dd1);
+ OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f);
+ OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
+ OP (FI, C, D, A, B, 6, 15, 0xa3014314);
+ OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
+ OP (FI, A, B, C, D, 4, 6, 0xf7537e82);
+ OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
+ OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
+ OP (FI, B, C, D, A, 9, 21, 0xeb86d391);
+
+ /* Put checksum in context given as argument. */
+ ctx->A += A;
+ ctx->B += B;
+ ctx->C += C;
+ ctx->D += D;
+
+ return /*burn_stack*/ 80+6*sizeof(void*);
+}
+
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+ unsigned int burn;
+
+ do
+ {
+ burn = transform_blk (c, data);
+ data += 64;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+
+/* The routine final terminates the message-digest computation and
+ * ends with the desired message digest in mdContext->digest[0...15].
+ * The handle is prepared for a new MD5 cycle.
+ * Returns 16 bytes representing the digest.
+ */
+
+static void
+md5_final( void *context)
+{
+ MD5_CONTEXT *hd = context;
+ u32 t, th, msb, lsb;
+ byte *p;
+ unsigned int burn;
+
+ t = hd->bctx.nblocks;
+ if (sizeof t == sizeof hd->bctx.nblocks)
+ th = hd->bctx.nblocks_high;
+ else
+ th = hd->bctx.nblocks >> 32;
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 26);
+ /* add the count */
+ t = lsb;
+ if( (lsb += hd->bctx.count) < t )
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if (hd->bctx.count < 56) /* enough room */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+ if (hd->bctx.count < 56)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 60, msb);
+ burn = transform (hd, hd->bctx.buf, 1);
+ }
+ else /* need one extra block */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+ /* fill pad and next block with zeroes */
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+ burn = transform (hd, hd->bctx.buf, 2);
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0)
+ X(A);
+ X(B);
+ X(C);
+ X(D);
+#undef X
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static byte *
+md5_read( void *context )
+{
+ MD5_CONTEXT *hd = (MD5_CONTEXT *) context;
+ return hd->bctx.buf;
+}
+
+static byte asn[18] = /* Object ID is 1.2.840.113549.2.5 */
+ { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48,
+ 0x86, 0xf7, 0x0d, 0x02, 0x05, 0x05, 0x00, 0x04, 0x10 };
+
+static gcry_md_oid_spec_t oid_spec_md5[] =
+ {
+ /* iso.member-body.us.rsadsi.pkcs.pkcs-1.4 (md5WithRSAEncryption) */
+ { "1.2.840.113549.1.1.4" },
+ /* RSADSI digestAlgorithm MD5 */
+ { "1.2.840.113549.2.5" },
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_md5 =
+ {
+ GCRY_MD_MD5, {0, 0},
+ "MD5", asn, DIM (asn), oid_spec_md5, 16,
+ md5_init, _gcry_md_block_write, md5_final, md5_read, NULL,
+ NULL, NULL,
+ sizeof (MD5_CONTEXT)
+ };
diff --git a/comm/third_party/libgcrypt/cipher/poly1305-internal.h b/comm/third_party/libgcrypt/cipher/poly1305-internal.h
new file mode 100644
index 0000000000..19cee5f6f3
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/poly1305-internal.h
@@ -0,0 +1,64 @@
+/* poly1305-internal.h - Poly1305 internals
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_POLY1305_INTERNAL_H
+#define G10_POLY1305_INTERNAL_H
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+
+#define POLY1305_TAGLEN 16
+#define POLY1305_KEYLEN 32
+#define POLY1305_BLOCKSIZE 16
+
+
+typedef struct
+{
+ u32 k[4];
+ u32 r[4];
+ u32 h[5];
+} POLY1305_STATE;
+
+typedef struct poly1305_context_s
+{
+ POLY1305_STATE state;
+ byte buffer[POLY1305_BLOCKSIZE];
+ unsigned int leftover;
+} poly1305_context_t;
+
+
+gcry_err_code_t _gcry_poly1305_init (poly1305_context_t *ctx, const byte *key,
+ size_t keylen);
+
+void _gcry_poly1305_finish (poly1305_context_t *ctx,
+ byte mac[POLY1305_TAGLEN]);
+
+void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *buf,
+ size_t buflen);
+
+unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx,
+ const byte *m, size_t bytes);
+
+#endif /* G10_POLY1305_INTERNAL_H */
diff --git a/comm/third_party/libgcrypt/cipher/poly1305-s390x.S b/comm/third_party/libgcrypt/cipher/poly1305-s390x.S
new file mode 100644
index 0000000000..844245f6ad
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/poly1305-s390x.S
@@ -0,0 +1,87 @@
+/* poly1305-s390x.S - zSeries implementation of Poly1305
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+
+#include "asm-poly1305-s390x.h"
+
+.text
+
+.balign 8
+.globl _gcry_poly1305_s390x_blocks1
+ELF(.type _gcry_poly1305_s390x_blocks1,@function;)
+
+_gcry_poly1305_s390x_blocks1:
+ /* input:
+ * %r2: poly1305-state
+ * %r3: src
+ * %r4: len
+ * %r5: high_pad
+ */
+ CFI_STARTPROC();
+
+ stmg %r6, %r14, 6 * 8(%r15);
+
+ lgr POLY_RSTATE, %r2;
+ lgr POLY_RSRC, %r3;
+ srlg %r0, %r4, 4;
+
+ cgije %r5, 0, .Lpoly_high0;
+
+ POLY1305_LOAD_STATE();
+
+.balign 4
+.Lpoly_loop_high1:
+ POLY1305_BLOCK_PART1(0 * 16);
+ INC_POLY1305_SRC(1 * 16);
+.Lpoly_block_part2:
+ POLY1305_BLOCK_PART2();
+ POLY1305_BLOCK_PART3();
+ POLY1305_BLOCK_PART4();
+ POLY1305_BLOCK_PART5();
+ POLY1305_BLOCK_PART6();
+ POLY1305_BLOCK_PART7();
+ POLY1305_BLOCK_PART8();
+
+ brctg %r0, .Lpoly_loop_high1;
+
+.balign 4
+.Lpoly_done:
+ POLY1305_STORE_STATE();
+
+ lmg %r6, %r14, 6 * 8(%r15);
+ xgr %r2, %r2;
+ br %r14;
+
+.balign 4
+.Lpoly_high0:
+ lghi %r0, 1;
+ POLY1305_LOAD_STATE();
+ POLY1305_BLOCK_PART1_HB(0 * 16, 0);
+ j .Lpoly_block_part2;
+
+ CFI_ENDPROC();
+ELF(.size _gcry_poly1305_s390x_blocks1,
+ .-_gcry_poly1305_s390x_blocks1;)
+
+#endif /*HAVE_GCC_INLINE_ASM_S390X*/
+#endif /*__s390x__*/
diff --git a/comm/third_party/libgcrypt/cipher/poly1305.c b/comm/third_party/libgcrypt/cipher/poly1305.c
new file mode 100644
index 0000000000..6cb4d2b72d
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/poly1305.c
@@ -0,0 +1,740 @@
+/* poly1305.c - Poly1305 internals and generic implementation
+ * Copyright (C) 2014,2017,2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "poly1305-internal.h"
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+static const char *selftest (void);
+
+
+#undef HAVE_ASM_POLY1305_BLOCKS
+
+
+#undef USE_MPI_64BIT
+#undef USE_MPI_32BIT
+#if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_TYPE_U64)
+# define USE_MPI_64BIT 1
+#elif BYTES_PER_MPI_LIMB == 4
+# define USE_MPI_32BIT 1
+#else
+# error please implement for this limb size.
+#endif
+
+
+/* USE_S390X_ASM indicates whether to enable zSeries code. */
+#undef USE_S390X_ASM
+#if BYTES_PER_MPI_LIMB == 8
+# if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+# if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_ASM 1
+# endif /* USE_S390X_ASM */
+# endif
+#endif
+
+
+#ifdef USE_S390X_ASM
+
+#define HAVE_ASM_POLY1305_BLOCKS 1
+
+extern unsigned int _gcry_poly1305_s390x_blocks1(void *state,
+ const byte *buf, size_t len,
+ byte high_pad);
+
+static unsigned int
+poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
+ byte high_pad)
+{
+ return _gcry_poly1305_s390x_blocks1(&ctx->state, buf, len, high_pad);
+}
+
+#endif /* USE_S390X_ASM */
+
+
+static void poly1305_init (poly1305_context_t *ctx,
+ const byte key[POLY1305_KEYLEN])
+{
+ POLY1305_STATE *st = &ctx->state;
+
+ ctx->leftover = 0;
+
+ st->h[0] = 0;
+ st->h[1] = 0;
+ st->h[2] = 0;
+ st->h[3] = 0;
+ st->h[4] = 0;
+
+ st->r[0] = buf_get_le32(key + 0) & 0x0fffffff;
+ st->r[1] = buf_get_le32(key + 4) & 0x0ffffffc;
+ st->r[2] = buf_get_le32(key + 8) & 0x0ffffffc;
+ st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc;
+
+ st->k[0] = buf_get_le32(key + 16);
+ st->k[1] = buf_get_le32(key + 20);
+ st->k[2] = buf_get_le32(key + 24);
+ st->k[3] = buf_get_le32(key + 28);
+}
+
+
+#ifdef USE_MPI_64BIT
+
+#if defined (__aarch64__) && __GNUC__ >= 4
+
+/* A += B (armv8/aarch64) */
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+ __asm__ ("adds %0, %3, %0\n" \
+ "adcs %1, %4, %1\n" \
+ "adc %2, %5, %2\n" \
+ : "+r" (A0), "+r" (A1), "+r" (A2) \
+ : "r" (B0), "r" (B1), "r" (B2) \
+ : "cc" )
+
+#endif /* __aarch64__ */
+
+#if defined (__x86_64__) && __GNUC__ >= 4
+
+/* A += B (x86-64) */
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+ __asm__ ("addq %3, %0\n" \
+ "adcq %4, %1\n" \
+ "adcq %5, %2\n" \
+ : "+r" (A0), "+r" (A1), "+r" (A2) \
+ : "g" (B0), "g" (B1), "g" (B2) \
+ : "cc" )
+
+#endif /* __x86_64__ */
+
+#if defined (__powerpc__) && __GNUC__ >= 4
+
+/* A += B (ppc64) */
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+ __asm__ ("addc %0, %3, %0\n" \
+ "adde %1, %4, %1\n" \
+ "adde %2, %5, %2\n" \
+ : "+r" (A0), "+r" (A1), "+r" (A2) \
+ : "r" (B0), "r" (B1), "r" (B2) \
+ : "cc" )
+
+#endif /* __powerpc__ */
+
+#ifndef ADD_1305_64
+/* A += B (generic, mpi) */
+# define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \
+ u64 carry; \
+ add_ssaaaa(carry, A0, 0, A0, 0, B0); \
+ add_ssaaaa(A2, A1, A2, A1, B2, B1); \
+ add_ssaaaa(A2, A1, A2, A1, 0, carry); \
+ } while (0)
+#endif
+
+/* H = H * R mod 2¹³⁰-5 */
+#define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \
+ u64 x0_lo, x0_hi, x1_lo, x1_hi; \
+ u64 t0_lo, t0_hi, t1_lo, t1_hi; \
+ \
+ /* x = a * r (partial mod 2^130-5) */ \
+ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \
+ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \
+ \
+ umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \
+ add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \
+ umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \
+ add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \
+ \
+ t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \
+ t1_hi = H2 * R0; /* h2 * r0 */ \
+ add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \
+ \
+ /* carry propagation */ \
+ H2 = H0 & 3; \
+ H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \
+ ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
+ } while (0)
+
+#ifndef HAVE_ASM_POLY1305_BLOCKS
+
+static unsigned int
+poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
+ byte high_pad)
+{
+ POLY1305_STATE *st = &ctx->state;
+ u64 r0, r1, r1_mult5;
+ u64 h0, h1, h2;
+ u64 m0, m1, m2;
+
+ m2 = high_pad;
+
+ h0 = st->h[0] + ((u64)st->h[1] << 32);
+ h1 = st->h[2] + ((u64)st->h[3] << 32);
+ h2 = st->h[4];
+
+ r0 = st->r[0] + ((u64)st->r[1] << 32);
+ r1 = st->r[2] + ((u64)st->r[3] << 32);
+
+ r1_mult5 = (r1 >> 2) + r1;
+
+ m0 = buf_get_le64(buf + 0);
+ m1 = buf_get_le64(buf + 8);
+ buf += POLY1305_BLOCKSIZE;
+ len -= POLY1305_BLOCKSIZE;
+
+ while (len >= POLY1305_BLOCKSIZE)
+ {
+ /* a = h + m */
+ ADD_1305_64(h2, h1, h0, m2, m1, m0);
+
+ m0 = buf_get_le64(buf + 0);
+ m1 = buf_get_le64(buf + 8);
+
+ /* h = a * r (partial mod 2^130-5) */
+ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5);
+
+ buf += POLY1305_BLOCKSIZE;
+ len -= POLY1305_BLOCKSIZE;
+ }
+
+ /* a = h + m */
+ ADD_1305_64(h2, h1, h0, m2, m1, m0);
+
+ /* h = a * r (partial mod 2^130-5) */
+ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5);
+
+ st->h[0] = h0;
+ st->h[1] = h0 >> 32;
+ st->h[2] = h1;
+ st->h[3] = h1 >> 32;
+ st->h[4] = h2;
+
+ return 6 * sizeof (void *) + 18 * sizeof (u64);
+}
+
+#endif /* !HAVE_ASM_POLY1305_BLOCKS */
+
+static unsigned int poly1305_final (poly1305_context_t *ctx,
+ byte mac[POLY1305_TAGLEN])
+{
+ POLY1305_STATE *st = &ctx->state;
+ unsigned int burn = 0;
+ u64 u, carry;
+ u64 k0, k1;
+ u64 h0, h1;
+ u64 h2;
+
+ /* process the remaining block */
+ if (ctx->leftover)
+ {
+ ctx->buffer[ctx->leftover++] = 1;
+ if (ctx->leftover < POLY1305_BLOCKSIZE)
+ {
+ memset (&ctx->buffer[ctx->leftover], 0,
+ POLY1305_BLOCKSIZE - ctx->leftover);
+ ctx->leftover = POLY1305_BLOCKSIZE;
+ }
+ burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0);
+ }
+
+ h0 = st->h[0] + ((u64)st->h[1] << 32);
+ h1 = st->h[2] + ((u64)st->h[3] << 32);
+ h2 = st->h[4];
+
+ k0 = st->k[0] + ((u64)st->k[1] << 32);
+ k1 = st->k[2] + ((u64)st->k[3] << 32);
+
+ /* check if h is more than 2^130-5, by adding 5. */
+ add_ssaaaa(carry, u, 0, h0, 0, 5);
+ add_ssaaaa(carry, u, 0, carry, 0, h1);
+ u = (carry + h2) >> 2; /* u == 0 or 1 */
+
+ /* minus 2^130-5 ... (+5) */
+ u = (-u) & 5;
+ add_ssaaaa(h1, h0, h1, h0, 0, u);
+
+ /* add high part of key + h */
+ add_ssaaaa(h1, h0, h1, h0, k1, k0);
+ buf_put_le64(mac + 0, h0);
+ buf_put_le64(mac + 8, h1);
+
+ /* burn_stack */
+ return 4 * sizeof (void *) + 7 * sizeof (u64) + burn;
+}
+
+#endif /* USE_MPI_64BIT */
+
+#ifdef USE_MPI_32BIT
+
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+/* HI:LO += A * B (arm) */
+#define UMUL_ADD_32(HI, LO, A, B) \
+ __asm__ ("umlal %1, %0, %4, %5" \
+ : "=r" (HI), "=r" (LO) \
+ : "0" (HI), "1" (LO), "r" (A), "r" (B) )
+
+/* A += B (arm) */
+#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \
+ __asm__ ("adds %0, %0, %5\n" \
+ "adcs %1, %1, %6\n" \
+ "adcs %2, %2, %7\n" \
+ "adcs %3, %3, %8\n" \
+ "adc %4, %4, %9\n" \
+ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \
+ : "r" (B0), "r" (B1), "r" (B2), "r" (B3), "r" (B4) \
+ : "cc" )
+
+#endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */
+
+#if defined (__i386__) && __GNUC__ >= 4
+
+/* A += B (i386) */
+#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \
+ __asm__ ("addl %5, %0\n" \
+ "adcl %6, %1\n" \
+ "adcl %7, %2\n" \
+ "adcl %8, %3\n" \
+ "adcl %9, %4\n" \
+ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \
+ : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \
+ : "cc" )
+
+#endif /* __i386__ */
+
+#ifndef UMUL_ADD_32
+/* HI:LO += A * B (generic, mpi) */
+# define UMUL_ADD_32(HI, LO, A, B) do { \
+ u32 t_lo, t_hi; \
+ umul_ppmm(t_hi, t_lo, A, B); \
+ add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \
+ } while (0)
+#endif
+
+#ifndef ADD_1305_32
+/* A += B (generic, mpi) */
+# define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \
+ u32 carry0, carry1, carry2; \
+ add_ssaaaa(carry0, A0, 0, A0, 0, B0); \
+ add_ssaaaa(carry1, A1, 0, A1, 0, B1); \
+ add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \
+ add_ssaaaa(carry2, A2, 0, A2, 0, B2); \
+ add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \
+ add_ssaaaa(A4, A3, A4, A3, B4, B3); \
+ add_ssaaaa(A4, A3, A4, A3, 0, carry2); \
+ } while (0)
+#endif
+
+/* H = H * R mod 2¹³⁰-5 */
+#define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \
+ R3_MULT5, R2_MULT5, R1_MULT5) do { \
+ u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \
+ u32 t0_lo, t0_hi; \
+ \
+ /* x = a * r (partial mod 2^130-5) */ \
+ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \
+ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \
+ umul_ppmm(x2_hi, x2_lo, H0, R2); /* h0 * r2 */ \
+ umul_ppmm(x3_hi, x3_lo, H0, R3); /* h0 * r3 */ \
+ \
+ UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \
+ UMUL_ADD_32(x1_hi, x1_lo, H1, R0); /* h1 * r0 */ \
+ UMUL_ADD_32(x2_hi, x2_lo, H1, R1); /* h1 * r1 */ \
+ UMUL_ADD_32(x3_hi, x3_lo, H1, R2); /* h1 * r2 */ \
+ \
+ UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \
+ UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \
+ UMUL_ADD_32(x2_hi, x2_lo, H2, R0); /* h2 * r0 */ \
+ UMUL_ADD_32(x3_hi, x3_lo, H2, R1); /* h2 * r1 */ \
+ \
+ UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \
+ H1 = x0_hi; \
+ UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \
+ UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \
+ UMUL_ADD_32(x3_hi, x3_lo, H3, R0); /* h3 * r0 */ \
+ \
+ t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \
+ t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \
+ add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \
+ add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \
+ t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \
+ t0_hi = H4 * R0; /* h4 * r0 */ \
+ add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \
+ \
+ /* carry propagation */ \
+ H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \
+ H4 = H4 & 3; \
+ ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \
+ } while (0)
+
+#ifndef HAVE_ASM_POLY1305_BLOCKS
+
+static unsigned int
+poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
+ byte high_pad)
+{
+ POLY1305_STATE *st = &ctx->state;
+ u32 r1_mult5, r2_mult5, r3_mult5;
+ u32 h0, h1, h2, h3, h4;
+ u32 m0, m1, m2, m3, m4;
+
+ m4 = high_pad;
+
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ r1_mult5 = (st->r[1] >> 2) + st->r[1];
+ r2_mult5 = (st->r[2] >> 2) + st->r[2];
+ r3_mult5 = (st->r[3] >> 2) + st->r[3];
+
+ while (len >= POLY1305_BLOCKSIZE)
+ {
+ m0 = buf_get_le32(buf + 0);
+ m1 = buf_get_le32(buf + 4);
+ m2 = buf_get_le32(buf + 8);
+ m3 = buf_get_le32(buf + 12);
+
+ /* a = h + m */
+ ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0);
+
+ /* h = a * r (partial mod 2^130-5) */
+ MUL_MOD_1305_32(h4, h3, h2, h1, h0,
+ st->r[3], st->r[2], st->r[1], st->r[0],
+ r3_mult5, r2_mult5, r1_mult5);
+
+ buf += POLY1305_BLOCKSIZE;
+ len -= POLY1305_BLOCKSIZE;
+ }
+
+ st->h[0] = h0;
+ st->h[1] = h1;
+ st->h[2] = h2;
+ st->h[3] = h3;
+ st->h[4] = h4;
+
+ return 6 * sizeof (void *) + 28 * sizeof (u32);
+}
+
+#endif /* !HAVE_ASM_POLY1305_BLOCKS */
+
+static unsigned int poly1305_final (poly1305_context_t *ctx,
+ byte mac[POLY1305_TAGLEN])
+{
+ POLY1305_STATE *st = &ctx->state;
+ unsigned int burn = 0;
+ u32 carry, tmp0, tmp1, tmp2, u;
+ u32 h4, h3, h2, h1, h0;
+
+ /* process the remaining block */
+ if (ctx->leftover)
+ {
+ ctx->buffer[ctx->leftover++] = 1;
+ if (ctx->leftover < POLY1305_BLOCKSIZE)
+ {
+ memset (&ctx->buffer[ctx->leftover], 0,
+ POLY1305_BLOCKSIZE - ctx->leftover);
+ ctx->leftover = POLY1305_BLOCKSIZE;
+ }
+ burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0);
+ }
+
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ /* check if h is more than 2^130-5, by adding 5. */
+ add_ssaaaa(carry, tmp0, 0, h0, 0, 5);
+ add_ssaaaa(carry, tmp0, 0, carry, 0, h1);
+ add_ssaaaa(carry, tmp0, 0, carry, 0, h2);
+ add_ssaaaa(carry, tmp0, 0, carry, 0, h3);
+ u = (carry + h4) >> 2; /* u == 0 or 1 */
+
+ /* minus 2^130-5 ... (+5) */
+ u = (-u) & 5;
+ add_ssaaaa(carry, h0, 0, h0, 0, u);
+ add_ssaaaa(carry, h1, 0, h1, 0, carry);
+ add_ssaaaa(carry, h2, 0, h2, 0, carry);
+ add_ssaaaa(carry, h3, 0, h3, 0, carry);
+
+ /* add high part of key + h */
+ add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]);
+ add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]);
+ add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0);
+ add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]);
+ add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1);
+ add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]);
+ h3 += tmp2;
+
+ buf_put_le32(mac + 0, h0);
+ buf_put_le32(mac + 4, h1);
+ buf_put_le32(mac + 8, h2);
+ buf_put_le32(mac + 12, h3);
+
+ /* burn_stack */
+ return 4 * sizeof (void *) + 10 * sizeof (u32) + burn;
+}
+
+#endif /* USE_MPI_32BIT */
+
+
+unsigned int
+_gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m,
+ size_t bytes)
+{
+ unsigned int burn = 0;
+
+ /* handle leftover */
+ if (ctx->leftover)
+ {
+ size_t want = (POLY1305_BLOCKSIZE - ctx->leftover);
+ if (want > bytes)
+ want = bytes;
+ buf_cpy (ctx->buffer + ctx->leftover, m, want);
+ bytes -= want;
+ m += want;
+ ctx->leftover += want;
+ if (ctx->leftover < POLY1305_BLOCKSIZE)
+ return 0;
+ burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1);
+ ctx->leftover = 0;
+ }
+
+ /* process full blocks */
+ if (bytes >= POLY1305_BLOCKSIZE)
+ {
+ size_t nblks = bytes / POLY1305_BLOCKSIZE;
+ burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1);
+ m += nblks * POLY1305_BLOCKSIZE;
+ bytes -= nblks * POLY1305_BLOCKSIZE;
+ }
+
+ /* store leftover */
+ if (bytes)
+ {
+ buf_cpy (ctx->buffer + ctx->leftover, m, bytes);
+ ctx->leftover += bytes;
+ }
+
+ return burn;
+}
+
+
+void
+_gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes)
+{
+ unsigned int burn;
+
+ burn = _gcry_poly1305_update_burn (ctx, m, bytes);
+
+ if (burn)
+ _gcry_burn_stack (burn);
+}
+
+
+void
+_gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN])
+{
+ unsigned int burn;
+
+ burn = poly1305_final (ctx, mac);
+
+ _gcry_burn_stack (burn);
+}
+
+
+gcry_err_code_t
+_gcry_poly1305_init (poly1305_context_t * ctx, const byte * key,
+ size_t keylen)
+{
+ static int initialized;
+ static const char *selftest_failed;
+
+ if (!initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+ if (selftest_failed)
+ log_error ("Poly1305 selftest failed (%s)\n", selftest_failed);
+ }
+
+ if (keylen != POLY1305_KEYLEN)
+ return GPG_ERR_INV_KEYLEN;
+
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ poly1305_init (ctx, key);
+
+ return 0;
+}
+
+
+static void
+poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes,
+ const byte * key)
+{
+ poly1305_context_t ctx;
+
+ memset (&ctx, 0, sizeof (ctx));
+
+ _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN);
+ _gcry_poly1305_update (&ctx, m, bytes);
+ _gcry_poly1305_finish (&ctx, mac);
+
+ wipememory (&ctx, sizeof (ctx));
+}
+
+
+static const char *
+selftest (void)
+{
+ /* example from nacl */
+ static const byte nacl_key[POLY1305_KEYLEN] = {
+ 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91,
+ 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25,
+ 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65,
+ 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80,
+ };
+
+ static const byte nacl_msg[131] = {
+ 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73,
+ 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce,
+ 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4,
+ 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a,
+ 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b,
+ 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72,
+ 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2,
+ 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38,
+ 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a,
+ 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae,
+ 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea,
+ 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda,
+ 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde,
+ 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3,
+ 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6,
+ 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74,
+ 0xe3, 0x55, 0xa5
+ };
+
+ static const byte nacl_mac[16] = {
+ 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5,
+ 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9
+ };
+
+ /* generates a final value of (2^130 - 2) == 3 */
+ static const byte wrap_key[POLY1305_KEYLEN] = {
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+ static const byte wrap_msg[16] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ };
+
+ static const byte wrap_mac[16] = {
+ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+ /* mac of the macs of messages of length 0 to 256, where the key and messages
+ * have all their values set to the length
+ */
+ static const byte total_key[POLY1305_KEYLEN] = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ };
+
+ static const byte total_mac[16] = {
+ 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd,
+ 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39
+ };
+
+ poly1305_context_t ctx;
+ poly1305_context_t total_ctx;
+ byte all_key[POLY1305_KEYLEN];
+ byte all_msg[256];
+ byte mac[16];
+ size_t i, j;
+
+ memset (&ctx, 0, sizeof (ctx));
+ memset (&total_ctx, 0, sizeof (total_ctx));
+
+ memset (mac, 0, sizeof (mac));
+ poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key);
+ if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0)
+ return "Poly1305 test 1 failed.";
+
+ /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so
+ * make sure everything still works varying between them */
+ memset (mac, 0, sizeof (mac));
+ _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN);
+ _gcry_poly1305_update (&ctx, nacl_msg + 0, 32);
+ _gcry_poly1305_update (&ctx, nacl_msg + 32, 64);
+ _gcry_poly1305_update (&ctx, nacl_msg + 96, 16);
+ _gcry_poly1305_update (&ctx, nacl_msg + 112, 8);
+ _gcry_poly1305_update (&ctx, nacl_msg + 120, 4);
+ _gcry_poly1305_update (&ctx, nacl_msg + 124, 2);
+ _gcry_poly1305_update (&ctx, nacl_msg + 126, 1);
+ _gcry_poly1305_update (&ctx, nacl_msg + 127, 1);
+ _gcry_poly1305_update (&ctx, nacl_msg + 128, 1);
+ _gcry_poly1305_update (&ctx, nacl_msg + 129, 1);
+ _gcry_poly1305_update (&ctx, nacl_msg + 130, 1);
+ _gcry_poly1305_finish (&ctx, mac);
+ if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0)
+ return "Poly1305 test 2 failed.";
+
+ memset (mac, 0, sizeof (mac));
+ poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key);
+ if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0)
+ return "Poly1305 test 3 failed.";
+
+ _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN);
+ for (i = 0; i < 256; i++)
+ {
+ /* set key and message to 'i,i,i..' */
+ for (j = 0; j < sizeof (all_key); j++)
+ all_key[j] = i;
+ for (j = 0; j < i; j++)
+ all_msg[j] = i;
+ poly1305_auth (mac, all_msg, i, all_key);
+ _gcry_poly1305_update (&total_ctx, mac, 16);
+ }
+ _gcry_poly1305_finish (&total_ctx, mac);
+ if (memcmp (total_mac, mac, sizeof (total_mac)) != 0)
+ return "Poly1305 test 4 failed.";
+
+ return NULL;
+}
diff --git a/comm/third_party/libgcrypt/cipher/primegen.c b/comm/third_party/libgcrypt/cipher/primegen.c
new file mode 100644
index 0000000000..e24de4dc7c
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/primegen.c
@@ -0,0 +1,1878 @@
+/* primegen.c - prime number generator
+ * Copyright (C) 1998, 2000, 2001, 2002, 2003
+ * 2004, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+
+static gcry_mpi_t gen_prime (unsigned int nbits, int secret, int randomlevel,
+ int (*extra_check)(void *, gcry_mpi_t),
+ void *extra_check_arg);
+static int check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds,
+ gcry_prime_check_func_t cb_func, void *cb_arg );
+static int is_prime (gcry_mpi_t n, int steps, unsigned int *count);
+static void m_out_of_n( char *array, int m, int n );
+
+static void (*progress_cb) (void *,const char*,int,int, int );
+static void *progress_cb_data;
+
+/* Note: 2 is not included because it can be tested more easily by
+ looking at bit 0. The last entry in this list is marked by a zero */
+static ushort small_prime_numbers[] = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
+ 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101,
+ 103, 107, 109, 113, 127, 131, 137, 139, 149, 151,
+ 157, 163, 167, 173, 179, 181, 191, 193, 197, 199,
+ 211, 223, 227, 229, 233, 239, 241, 251, 257, 263,
+ 269, 271, 277, 281, 283, 293, 307, 311, 313, 317,
+ 331, 337, 347, 349, 353, 359, 367, 373, 379, 383,
+ 389, 397, 401, 409, 419, 421, 431, 433, 439, 443,
+ 449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
+ 509, 521, 523, 541, 547, 557, 563, 569, 571, 577,
+ 587, 593, 599, 601, 607, 613, 617, 619, 631, 641,
+ 643, 647, 653, 659, 661, 673, 677, 683, 691, 701,
+ 709, 719, 727, 733, 739, 743, 751, 757, 761, 769,
+ 773, 787, 797, 809, 811, 821, 823, 827, 829, 839,
+ 853, 857, 859, 863, 877, 881, 883, 887, 907, 911,
+ 919, 929, 937, 941, 947, 953, 967, 971, 977, 983,
+ 991, 997, 1009, 1013, 1019, 1021, 1031, 1033,
+ 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091,
+ 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151,
+ 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213,
+ 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277,
+ 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307,
+ 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399,
+ 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451,
+ 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493,
+ 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559,
+ 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609,
+ 1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667,
+ 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733,
+ 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789,
+ 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871,
+ 1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931,
+ 1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997,
+ 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053,
+ 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111,
+ 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161,
+ 2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243,
+ 2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297,
+ 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357,
+ 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411,
+ 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473,
+ 2477, 2503, 2521, 2531, 2539, 2543, 2549, 2551,
+ 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633,
+ 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687,
+ 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729,
+ 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791,
+ 2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851,
+ 2857, 2861, 2879, 2887, 2897, 2903, 2909, 2917,
+ 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999,
+ 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061,
+ 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137,
+ 3163, 3167, 3169, 3181, 3187, 3191, 3203, 3209,
+ 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271,
+ 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331,
+ 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391,
+ 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467,
+ 3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533,
+ 3539, 3541, 3547, 3557, 3559, 3571, 3581, 3583,
+ 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643,
+ 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709,
+ 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779,
+ 3793, 3797, 3803, 3821, 3823, 3833, 3847, 3851,
+ 3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917,
+ 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989,
+ 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049,
+ 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111,
+ 4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177,
+ 4201, 4211, 4217, 4219, 4229, 4231, 4241, 4243,
+ 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297,
+ 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391,
+ 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457,
+ 4463, 4481, 4483, 4493, 4507, 4513, 4517, 4519,
+ 4523, 4547, 4549, 4561, 4567, 4583, 4591, 4597,
+ 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657,
+ 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729,
+ 4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799,
+ 4801, 4813, 4817, 4831, 4861, 4871, 4877, 4889,
+ 4903, 4909, 4919, 4931, 4933, 4937, 4943, 4951,
+ 4957, 4967, 4969, 4973, 4987, 4993, 4999,
+ 0
+};
+static int no_of_small_prime_numbers = DIM (small_prime_numbers) - 1;
+
+
+
+/* An object and a list to build up a global pool of primes. See
+ save_pool_prime and get_pool_prime. */
+struct primepool_s
+{
+ struct primepool_s *next;
+ gcry_mpi_t prime; /* If this is NULL the entry is not used. */
+ unsigned int nbits;
+ gcry_random_level_t randomlevel;
+};
+struct primepool_s *primepool;
+/* Mutex used to protect access to the primepool. */
+GPGRT_LOCK_DEFINE (primepool_lock);
+
+
+gcry_err_code_t
+_gcry_primegen_init (void)
+{
+ /* This function was formerly used to initialize the primepool
+ Mutex. This has been replace by a static initialization. */
+ return 0;
+}
+
+
+/* Save PRIME which has been generated at RANDOMLEVEL for later
+ use. Needs to be called while primepool_lock is being hold. Note
+ that PRIME should be considered released after calling this
+ function. */
+static void
+save_pool_prime (gcry_mpi_t prime, gcry_random_level_t randomlevel)
+{
+ struct primepool_s *item, *item2;
+ size_t n;
+
+ for (n=0, item = primepool; item; item = item->next, n++)
+ if (!item->prime)
+ break;
+ if (!item && n > 100)
+ {
+ /* Remove some of the entries. Our strategy is removing
+ the last third from the list. */
+ int i;
+
+ for (i=0, item2 = primepool; item2; item2 = item2->next)
+ {
+ if (i >= n/3*2)
+ {
+ _gcry_mpi_release (item2->prime);
+ item2->prime = NULL;
+ if (!item)
+ item = item2;
+ }
+ }
+ }
+ if (!item)
+ {
+ item = xtrycalloc (1, sizeof *item);
+ if (!item)
+ {
+ /* Out of memory. Silently giving up. */
+ _gcry_mpi_release (prime);
+ return;
+ }
+ item->next = primepool;
+ primepool = item;
+ }
+ item->prime = prime;
+ item->nbits = mpi_get_nbits (prime);
+ item->randomlevel = randomlevel;
+}
+
+
+/* Return a prime for the prime pool or NULL if none has been found.
+ The prime needs to match NBITS and randomlevel. This function needs
+ to be called with the primepool_look is being hold. */
+static gcry_mpi_t
+get_pool_prime (unsigned int nbits, gcry_random_level_t randomlevel)
+{
+ struct primepool_s *item;
+
+ for (item = primepool; item; item = item->next)
+ if (item->prime
+ && item->nbits == nbits && item->randomlevel == randomlevel)
+ {
+ gcry_mpi_t prime = item->prime;
+ item->prime = NULL;
+ gcry_assert (nbits == mpi_get_nbits (prime));
+ return prime;
+ }
+ return NULL;
+}
+
+
+
+
+
+
+void
+_gcry_register_primegen_progress ( void (*cb)(void *,const char*,int,int,int),
+ void *cb_data )
+{
+ progress_cb = cb;
+ progress_cb_data = cb_data;
+}
+
+
+static void
+progress( int c )
+{
+ if ( progress_cb )
+ progress_cb ( progress_cb_data, "primegen", c, 0, 0 );
+}
+
+
+/****************
+ * Generate a prime number (stored in secure memory)
+ */
+gcry_mpi_t
+_gcry_generate_secret_prime (unsigned int nbits,
+ gcry_random_level_t random_level,
+ int (*extra_check)(void*, gcry_mpi_t),
+ void *extra_check_arg)
+{
+ gcry_mpi_t prime;
+
+ prime = gen_prime (nbits, 1, random_level, extra_check, extra_check_arg);
+ progress('\n');
+ return prime;
+}
+
+
+/* Generate a prime number which may be public, i.e. not allocated in
+ secure memory. */
+gcry_mpi_t
+_gcry_generate_public_prime (unsigned int nbits,
+ gcry_random_level_t random_level,
+ int (*extra_check)(void*, gcry_mpi_t),
+ void *extra_check_arg)
+{
+ gcry_mpi_t prime;
+
+ prime = gen_prime (nbits, 0, random_level, extra_check, extra_check_arg);
+ progress('\n');
+ return prime;
+}
+
+
+/* Core prime generation function. The algorithm used to generate
+ practically save primes is due to Lim and Lee as described in the
+ CRYPTO '97 proceedings (ISBN3540633847) page 260.
+
+ NEED_Q_FACTOR: If true make sure that at least one factor is of
+ size qbits. This is for example required for DSA.
+ PRIME_GENERATED: Adresss of a variable where the resulting prime
+ number will be stored.
+ PBITS: Requested size of the prime number. At least 48.
+ QBITS: One factor of the prime needs to be of this size. Maybe 0
+ if this is not required. See also MODE.
+ G: If not NULL an MPI which will receive a generator for the prime
+ for use with Elgamal.
+ RET_FACTORS: if not NULL, an array with all factors are stored at
+ that address.
+ ALL_FACTORS: If set to true all factors of prime-1 are returned.
+ RANDOMLEVEL: How strong should the random numers be.
+ FLAGS: Prime generation bit flags. Currently supported:
+ GCRY_PRIME_FLAG_SECRET - The prime needs to be kept secret.
+ CB_FUNC, CB_ARG: Callback to be used for extra checks.
+
+ */
+static gcry_err_code_t
+prime_generate_internal (int need_q_factor,
+ gcry_mpi_t *prime_generated, unsigned int pbits,
+ unsigned int qbits, gcry_mpi_t g,
+ gcry_mpi_t **ret_factors,
+ gcry_random_level_t randomlevel, unsigned int flags,
+ int all_factors,
+ gcry_prime_check_func_t cb_func, void *cb_arg)
+{
+ gcry_err_code_t err = 0;
+ gcry_mpi_t *factors_new = NULL; /* Factors to return to the
+ caller. */
+ gcry_mpi_t *factors = NULL; /* Current factors. */
+ gcry_random_level_t poolrandomlevel; /* Random level used for pool primes. */
+ gcry_mpi_t *pool = NULL; /* Pool of primes. */
+ int *pool_in_use = NULL; /* Array with currently used POOL elements. */
+ unsigned char *perms = NULL; /* Permutations of POOL. */
+ gcry_mpi_t q_factor = NULL; /* Used if QBITS is non-zero. */
+ unsigned int fbits = 0; /* Length of prime factors. */
+ unsigned int n = 0; /* Number of factors. */
+ unsigned int m = 0; /* Number of primes in pool. */
+ gcry_mpi_t q = NULL; /* First prime factor. */
+ gcry_mpi_t prime = NULL; /* Prime candidate. */
+ unsigned int nprime = 0; /* Bits of PRIME. */
+ unsigned int req_qbits; /* The original QBITS value. */
+ gcry_mpi_t val_2; /* For check_prime(). */
+ int is_locked = 0; /* Flag to help unlocking the primepool. */
+ unsigned int is_secret = (flags & GCRY_PRIME_FLAG_SECRET);
+ unsigned int count1 = 0, count2 = 0;
+ unsigned int i = 0, j = 0;
+
+ if (pbits < 48)
+ return GPG_ERR_INV_ARG;
+
+ /* We won't use a too strong random elvel for the pooled subprimes. */
+ poolrandomlevel = (randomlevel > GCRY_STRONG_RANDOM?
+ GCRY_STRONG_RANDOM : randomlevel);
+
+
+ /* If QBITS is not given, assume a reasonable value. */
+ if (!qbits)
+ qbits = pbits / 3;
+
+ req_qbits = qbits;
+
+ /* Find number of needed prime factors N. */
+ for (n = 1; (pbits - qbits - 1) / n >= qbits; n++)
+ ;
+ n--;
+
+ val_2 = mpi_alloc_set_ui (2);
+
+ if ((! n) || ((need_q_factor) && (n < 2)))
+ {
+ err = GPG_ERR_INV_ARG;
+ goto leave;
+ }
+
+ if (need_q_factor)
+ {
+ n--; /* Need one factor less because we want a specific Q-FACTOR. */
+ fbits = (pbits - 2 * req_qbits -1) / n;
+ qbits = pbits - req_qbits - n * fbits;
+ }
+ else
+ {
+ fbits = (pbits - req_qbits -1) / n;
+ qbits = pbits - n * fbits;
+ }
+
+ if (DBG_CIPHER)
+ log_debug ("gen prime: pbits=%u qbits=%u fbits=%u/%u n=%d\n",
+ pbits, req_qbits, qbits, fbits, n);
+
+ /* Allocate an integer to old the new prime. */
+ prime = mpi_new (pbits);
+
+ /* Generate first prime factor. */
+ q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL);
+
+ /* Generate a specific Q-Factor if requested. */
+ if (need_q_factor)
+ q_factor = gen_prime (req_qbits, is_secret, randomlevel, NULL, NULL);
+
+ /* Allocate an array to hold all factors + 2 for later usage. */
+ factors = xtrycalloc (n + 2, sizeof (*factors));
+ if (!factors)
+ {
+ err = gpg_err_code_from_errno (errno);
+ goto leave;
+ }
+
+ /* Allocate an array to track pool usage. */
+ pool_in_use = xtrymalloc (n * sizeof *pool_in_use);
+ if (!pool_in_use)
+ {
+ err = gpg_err_code_from_errno (errno);
+ goto leave;
+ }
+ for (i=0; i < n; i++)
+ pool_in_use[i] = -1;
+
+ /* Make a pool of 3n+5 primes (this is an arbitrary value). We
+ require at least 30 primes for are useful selection process.
+
+ Fixme: We need to research the best formula for sizing the pool.
+ */
+ m = n * 3 + 5;
+ if (need_q_factor) /* Need some more in this case. */
+ m += 5;
+ if (m < 30)
+ m = 30;
+ pool = xtrycalloc (m , sizeof (*pool));
+ if (! pool)
+ {
+ err = gpg_err_code_from_errno (errno);
+ goto leave;
+ }
+
+ /* Permutate over the pool of primes until we find a prime of the
+ requested length. */
+ do
+ {
+ next_try:
+ for (i=0; i < n; i++)
+ pool_in_use[i] = -1;
+
+ if (!perms)
+ {
+ /* Allocate new primes. This is done right at the beginning
+ of the loop and if we have later run out of primes. */
+ for (i = 0; i < m; i++)
+ {
+ mpi_free (pool[i]);
+ pool[i] = NULL;
+ }
+
+ /* Init m_out_of_n(). */
+ perms = xtrycalloc (1, m);
+ if (!perms)
+ {
+ err = gpg_err_code_from_errno (errno);
+ goto leave;
+ }
+
+ err = gpgrt_lock_lock (&primepool_lock);
+ if (err)
+ goto leave;
+ is_locked = 1;
+
+ for (i = 0; i < n; i++)
+ {
+ perms[i] = 1;
+ /* At a maximum we use strong random for the factors.
+ This saves us a lot of entropy. Given that Q and
+ possible Q-factor are also used in the final prime
+ this should be acceptable. We also don't allocate in
+ secure memory to save on that scare resource too. If
+ Q has been allocated in secure memory, the final
+ prime will be saved there anyway. This is because
+ our MPI routines take care of that. GnuPG has worked
+ this way ever since. */
+ pool[i] = NULL;
+ if (is_locked)
+ {
+ pool[i] = get_pool_prime (fbits, poolrandomlevel);
+ if (!pool[i])
+ {
+ err = gpgrt_lock_unlock (&primepool_lock);
+ if (err)
+ goto leave;
+ is_locked = 0;
+ }
+ }
+ if (!pool[i])
+ pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL);
+ pool_in_use[i] = i;
+ factors[i] = pool[i];
+ }
+
+ if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock)))
+ goto leave;
+ is_locked = 0;
+ }
+ else
+ {
+ /* Get next permutation. */
+ m_out_of_n ( (char*)perms, n, m);
+
+ if ((err = gpgrt_lock_lock (&primepool_lock)))
+ goto leave;
+ is_locked = 1;
+
+ for (i = j = 0; (i < m) && (j < n); i++)
+ if (perms[i])
+ {
+ /* If the subprime has not yet beed generated do it now. */
+ if (!pool[i] && is_locked)
+ {
+ pool[i] = get_pool_prime (fbits, poolrandomlevel);
+ if (!pool[i])
+ {
+ if ((err = gpgrt_lock_unlock (&primepool_lock)))
+ goto leave;
+ is_locked = 0;
+ }
+ }
+ if (!pool[i])
+ pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL);
+ pool_in_use[j] = i;
+ factors[j++] = pool[i];
+ }
+
+ if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock)))
+ goto leave;
+ is_locked = 0;
+
+ if (i == n)
+ {
+ /* Ran out of permutations: Allocate new primes. */
+ xfree (perms);
+ perms = NULL;
+ progress ('!');
+ goto next_try;
+ }
+ }
+
+ /* Generate next prime candidate:
+ p = 2 * q [ * q_factor] * factor_0 * factor_1 * ... * factor_n + 1.
+ */
+ mpi_set (prime, q);
+ mpi_mul_ui (prime, prime, 2);
+ if (need_q_factor)
+ mpi_mul (prime, prime, q_factor);
+ for(i = 0; i < n; i++)
+ mpi_mul (prime, prime, factors[i]);
+ mpi_add_ui (prime, prime, 1);
+ nprime = mpi_get_nbits (prime);
+
+ if (nprime < pbits)
+ {
+ if (++count1 > 20)
+ {
+ count1 = 0;
+ qbits++;
+ progress('>');
+ mpi_free (q);
+ q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL);
+ goto next_try;
+ }
+ }
+ else
+ count1 = 0;
+
+ if (nprime > pbits)
+ {
+ if (++count2 > 20)
+ {
+ count2 = 0;
+ qbits--;
+ progress('<');
+ mpi_free (q);
+ q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL);
+ goto next_try;
+ }
+ }
+ else
+ count2 = 0;
+ }
+ while (! ((nprime == pbits) && check_prime (prime, val_2, 5,
+ cb_func, cb_arg)));
+
+ if (DBG_CIPHER)
+ {
+ progress ('\n');
+ log_mpidump ("prime ", prime);
+ log_mpidump ("factor q", q);
+ if (need_q_factor)
+ log_mpidump ("factor q0", q_factor);
+ for (i = 0; i < n; i++)
+ log_mpidump ("factor pi", factors[i]);
+ log_debug ("bit sizes: prime=%u, q=%u",
+ mpi_get_nbits (prime), mpi_get_nbits (q));
+ if (need_q_factor)
+ log_printf (", q0=%u", mpi_get_nbits (q_factor));
+ for (i = 0; i < n; i++)
+ log_printf (", p%d=%u", i, mpi_get_nbits (factors[i]));
+ log_printf ("\n");
+ }
+
+ if (ret_factors)
+ {
+ /* Caller wants the factors. */
+ factors_new = xtrycalloc (n + 4, sizeof (*factors_new));
+ if (! factors_new)
+ {
+ err = gpg_err_code_from_errno (errno);
+ goto leave;
+ }
+
+ if (all_factors)
+ {
+ i = 0;
+ factors_new[i++] = mpi_set_ui (NULL, 2);
+ factors_new[i++] = mpi_copy (q);
+ if (need_q_factor)
+ factors_new[i++] = mpi_copy (q_factor);
+ for(j=0; j < n; j++)
+ factors_new[i++] = mpi_copy (factors[j]);
+ }
+ else
+ {
+ i = 0;
+ if (need_q_factor)
+ {
+ factors_new[i++] = mpi_copy (q_factor);
+ for (; i <= n; i++)
+ factors_new[i] = mpi_copy (factors[i]);
+ }
+ else
+ for (; i < n; i++ )
+ factors_new[i] = mpi_copy (factors[i]);
+ }
+ }
+
+ if (g && need_q_factor)
+ err = GPG_ERR_NOT_IMPLEMENTED;
+ else if (g)
+ {
+ /* Create a generator (start with 3). */
+ gcry_mpi_t tmp = mpi_alloc (mpi_get_nlimbs (prime));
+ gcry_mpi_t b = mpi_alloc (mpi_get_nlimbs (prime));
+ gcry_mpi_t pmin1 = mpi_alloc (mpi_get_nlimbs (prime));
+
+ factors[n] = q;
+ factors[n + 1] = mpi_alloc_set_ui (2);
+ mpi_sub_ui (pmin1, prime, 1);
+ mpi_set_ui (g, 2);
+ do
+ {
+ mpi_add_ui (g, g, 1);
+ if (DBG_CIPHER)
+ log_printmpi ("checking g", g);
+ else
+ progress('^');
+ for (i = 0; i < n + 2; i++)
+ {
+ mpi_fdiv_q (tmp, pmin1, factors[i]);
+ /* No mpi_pow(), but it is okay to use this with mod
+ prime. */
+ mpi_powm (b, g, tmp, prime);
+ if (! mpi_cmp_ui (b, 1))
+ break;
+ }
+ if (DBG_CIPHER)
+ progress('\n');
+ }
+ while (i < n + 2);
+
+ mpi_free (factors[n+1]);
+ mpi_free (tmp);
+ mpi_free (b);
+ mpi_free (pmin1);
+ }
+
+ if (! DBG_CIPHER)
+ progress ('\n');
+
+
+ leave:
+ if (pool)
+ {
+ is_locked = !gpgrt_lock_lock (&primepool_lock);
+ for(i = 0; i < m; i++)
+ {
+ if (pool[i])
+ {
+ for (j=0; j < n; j++)
+ if (pool_in_use[j] == i)
+ break;
+ if (j == n && is_locked)
+ {
+ /* This pooled subprime has not been used. */
+ save_pool_prime (pool[i], poolrandomlevel);
+ }
+ else
+ mpi_free (pool[i]);
+ }
+ }
+ if (is_locked)
+ err = gpgrt_lock_unlock (&primepool_lock);
+ is_locked = 0;
+ xfree (pool);
+ }
+ xfree (pool_in_use);
+ if (factors)
+ xfree (factors); /* Factors are shallow copies. */
+ if (perms)
+ xfree (perms);
+
+ mpi_free (val_2);
+ mpi_free (q);
+ mpi_free (q_factor);
+
+ if (! err)
+ {
+ *prime_generated = prime;
+ if (ret_factors)
+ *ret_factors = factors_new;
+ }
+ else
+ {
+ if (factors_new)
+ {
+ for (i = 0; factors_new[i]; i++)
+ mpi_free (factors_new[i]);
+ xfree (factors_new);
+ }
+ mpi_free (prime);
+ }
+
+ return err;
+}
+
+
+/* Generate a prime used for discrete logarithm algorithms; i.e. this
+ prime will be public and no strong random is required. On success
+ R_PRIME receives a new MPI with the prime. On error R_PRIME is set
+ to NULL and an error code is returned. If RET_FACTORS is not NULL
+ it is set to an allocated array of factors on success or to NULL on
+ error. */
+gcry_err_code_t
+_gcry_generate_elg_prime (int mode, unsigned pbits, unsigned qbits,
+ gcry_mpi_t g,
+ gcry_mpi_t *r_prime, gcry_mpi_t **ret_factors)
+{
+ *r_prime = NULL;
+ if (ret_factors)
+ *ret_factors = NULL;
+ return prime_generate_internal ((mode == 1), r_prime, pbits, qbits, g,
+ ret_factors, GCRY_WEAK_RANDOM, 0, 0,
+ NULL, NULL);
+}
+
+
+static gcry_mpi_t
+gen_prime (unsigned int nbits, int secret, int randomlevel,
+ int (*extra_check)(void *, gcry_mpi_t), void *extra_check_arg)
+{
+ gcry_mpi_t prime, ptest, pminus1, val_2, val_3, result;
+ int i;
+ unsigned int x, step;
+ unsigned int count1, count2;
+ int *mods;
+
+/* if ( DBG_CIPHER ) */
+/* log_debug ("generate a prime of %u bits ", nbits ); */
+
+ if (nbits < 16)
+ log_fatal ("can't generate a prime with less than %d bits\n", 16);
+
+ mods = (secret? xmalloc_secure (no_of_small_prime_numbers * sizeof *mods)
+ /* */ : xmalloc (no_of_small_prime_numbers * sizeof *mods));
+ /* Make nbits fit into gcry_mpi_t implementation. */
+ val_2 = mpi_alloc_set_ui( 2 );
+ val_3 = mpi_alloc_set_ui( 3);
+ prime = secret? mpi_snew (nbits): mpi_new (nbits);
+ result = mpi_alloc_like( prime );
+ pminus1= mpi_alloc_like( prime );
+ ptest = mpi_alloc_like( prime );
+ count1 = count2 = 0;
+ for (;;)
+ { /* try forvever */
+ int dotcount=0;
+
+ /* generate a random number */
+ _gcry_mpi_randomize( prime, nbits, randomlevel );
+
+ /* Set high order bit to 1, set low order bit to 1. If we are
+ generating a secret prime we are most probably doing that
+ for RSA, to make sure that the modulus does have the
+ requested key size we set the 2 high order bits. */
+ mpi_set_highbit (prime, nbits-1);
+ if (secret)
+ mpi_set_bit (prime, nbits-2);
+ mpi_set_bit(prime, 0);
+
+ /* Calculate all remainders. */
+ for (i=0; (x = small_prime_numbers[i]); i++ )
+ mods[i] = mpi_fdiv_r_ui(NULL, prime, x);
+
+ /* Now try some primes starting with prime. */
+ for(step=0; step < 20000; step += 2 )
+ {
+ /* Check against all the small primes we have in mods. */
+ count1++;
+ for (i=0; (x = small_prime_numbers[i]); i++ )
+ {
+ while ( mods[i] + step >= x )
+ mods[i] -= x;
+ if ( !(mods[i] + step) )
+ break;
+ }
+ if ( x )
+ continue; /* Found a multiple of an already known prime. */
+
+ mpi_add_ui( ptest, prime, step );
+
+ /* Do a fast Fermat test now. */
+ count2++;
+ mpi_sub_ui( pminus1, ptest, 1);
+ mpi_powm( result, val_2, pminus1, ptest );
+ if ( !mpi_cmp_ui( result, 1 ) )
+ {
+ /* Not composite, perform stronger tests */
+ if (is_prime(ptest, 5, &count2 ))
+ {
+ if (!mpi_test_bit( ptest, nbits-1-secret ))
+ {
+ progress('\n');
+ log_debug ("overflow in prime generation\n");
+ break; /* Stop loop, continue with a new prime. */
+ }
+
+ if (extra_check && extra_check (extra_check_arg, ptest))
+ {
+ /* The extra check told us that this prime is
+ not of the caller's taste. */
+ progress ('/');
+ }
+ else
+ {
+ /* Got it. */
+ mpi_free(val_2);
+ mpi_free(val_3);
+ mpi_free(result);
+ mpi_free(pminus1);
+ mpi_free(prime);
+ xfree(mods);
+ return ptest;
+ }
+ }
+ }
+ if (++dotcount == 10 )
+ {
+ progress('.');
+ dotcount = 0;
+ }
+ }
+ progress(':'); /* restart with a new random value */
+ }
+}
+
+/****************
+ * Returns: true if this may be a prime
+ * RM_ROUNDS gives the number of Rabin-Miller tests to run.
+ */
+static int
+check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds,
+ gcry_prime_check_func_t cb_func, void *cb_arg)
+{
+ int i;
+ unsigned int x;
+ unsigned int count=0;
+
+ /* Check against small primes. */
+ for (i=0; (x = small_prime_numbers[i]); i++ )
+ {
+ if ( mpi_divisible_ui( prime, x ) )
+ return !mpi_cmp_ui (prime, x);
+ }
+
+ /* A quick Fermat test. */
+ {
+ gcry_mpi_t result = mpi_alloc_like( prime );
+ gcry_mpi_t pminus1 = mpi_alloc_like( prime );
+ mpi_sub_ui( pminus1, prime, 1);
+ mpi_powm( result, val_2, pminus1, prime );
+ mpi_free( pminus1 );
+ if ( mpi_cmp_ui( result, 1 ) )
+ {
+ /* Is composite. */
+ mpi_free( result );
+ progress('.');
+ return 0;
+ }
+ mpi_free( result );
+ }
+
+ if (!cb_func || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_MAYBE_PRIME, prime))
+ {
+ /* Perform stronger tests. */
+ if ( is_prime( prime, rm_rounds, &count ) )
+ {
+ if (!cb_func
+ || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_GOT_PRIME, prime))
+ return 1; /* Probably a prime. */
+ }
+ }
+ progress('.');
+ return 0;
+}
+
+
+/*
+ * Return true if n is probably a prime
+ */
+static int
+is_prime (gcry_mpi_t n, int steps, unsigned int *count)
+{
+ gcry_mpi_t x = mpi_alloc( mpi_get_nlimbs( n ) );
+ gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs( n ) );
+ gcry_mpi_t z = mpi_alloc( mpi_get_nlimbs( n ) );
+ gcry_mpi_t nminus1 = mpi_alloc( mpi_get_nlimbs( n ) );
+ gcry_mpi_t a2 = mpi_alloc_set_ui( 2 );
+ gcry_mpi_t q;
+ unsigned i, j, k;
+ int rc = 0;
+ unsigned nbits = mpi_get_nbits( n );
+
+ if (steps < 5) /* Make sure that we do at least 5 rounds. */
+ steps = 5;
+
+ mpi_sub_ui( nminus1, n, 1 );
+
+ /* Find q and k, so that n = 1 + 2^k * q . */
+ q = mpi_copy ( nminus1 );
+ k = mpi_trailing_zeros ( q );
+ mpi_tdiv_q_2exp (q, q, k);
+
+ for (i=0 ; i < steps; i++ )
+ {
+ ++*count;
+ if( !i )
+ {
+ mpi_set_ui( x, 2 );
+ }
+ else
+ {
+ /* We need to loop to avoid an X with value 0 or 1. */
+ do
+ {
+ _gcry_mpi_randomize (x, nbits, GCRY_WEAK_RANDOM);
+
+ /* Make sure that the number is smaller than the prime
+ * and keep the randomness of the high bit. */
+ if (mpi_test_bit (x, nbits-2))
+ {
+ mpi_set_highbit (x, nbits-2); /* Clear all higher bits. */
+ }
+ else
+ {
+ mpi_set_highbit (x, nbits-2);
+ mpi_clear_bit (x, nbits-2);
+ }
+ }
+ while (mpi_cmp_ui (x, 1) <= 0);
+ gcry_assert (mpi_cmp (x, nminus1) < 0);
+ }
+ mpi_powm ( y, x, q, n);
+ if ( mpi_cmp_ui(y, 1) && mpi_cmp( y, nminus1 ) )
+ {
+ for ( j=1; j < k && mpi_cmp( y, nminus1 ); j++ )
+ {
+ mpi_powm(y, y, a2, n);
+ if( !mpi_cmp_ui( y, 1 ) )
+ goto leave; /* Not a prime. */
+ }
+ if (mpi_cmp( y, nminus1 ) )
+ goto leave; /* Not a prime. */
+ }
+ progress('+');
+ }
+ rc = 1; /* May be a prime. */
+
+ leave:
+ mpi_free( x );
+ mpi_free( y );
+ mpi_free( z );
+ mpi_free( nminus1 );
+ mpi_free( q );
+ mpi_free( a2 );
+
+ return rc;
+}
+
+
+/* Given ARRAY of size N with M elements set to true produce a
+ modified array with the next permutation of M elements. Note, that
+ ARRAY is used in a one-bit-per-byte approach. To detected the last
+ permutation it is useful to initialize the array with the first M
+ element set to true and use this test:
+ m_out_of_n (array, m, n);
+ for (i = j = 0; i < n && j < m; i++)
+ if (array[i])
+ j++;
+ if (j == m)
+ goto ready;
+
+ This code is based on the algorithm 452 from the "Collected
+ Algorithms From ACM, Volume II" by C. N. Liu and D. T. Tang.
+*/
+static void
+m_out_of_n ( char *array, int m, int n )
+{
+ int i=0, i1=0, j=0, jp=0, j1=0, k1=0, k2=0;
+
+ if( !m || m >= n )
+ return;
+
+ /* Need to handle this simple case separately. */
+ if( m == 1 )
+ {
+ for (i=0; i < n; i++ )
+ {
+ if ( array[i] )
+ {
+ array[i++] = 0;
+ if( i >= n )
+ i = 0;
+ array[i] = 1;
+ return;
+ }
+ }
+ BUG();
+ }
+
+
+ for (j=1; j < n; j++ )
+ {
+ if ( array[n-1] == array[n-j-1])
+ continue;
+ j1 = j;
+ break;
+ }
+
+ if ( (m & 1) )
+ {
+ /* M is odd. */
+ if( array[n-1] )
+ {
+ if( j1 & 1 )
+ {
+ k1 = n - j1;
+ k2 = k1+2;
+ if( k2 > n )
+ k2 = n;
+ goto leave;
+ }
+ goto scan;
+ }
+ k2 = n - j1 - 1;
+ if( k2 == 0 )
+ {
+ k1 = i;
+ k2 = n - j1;
+ }
+ else if( array[k2] && array[k2-1] )
+ k1 = n;
+ else
+ k1 = k2 + 1;
+ }
+ else
+ {
+ /* M is even. */
+ if( !array[n-1] )
+ {
+ k1 = n - j1;
+ k2 = k1 + 1;
+ goto leave;
+ }
+
+ if( !(j1 & 1) )
+ {
+ k1 = n - j1;
+ k2 = k1+2;
+ if( k2 > n )
+ k2 = n;
+ goto leave;
+ }
+ scan:
+ jp = n - j1 - 1;
+ for (i=1; i <= jp; i++ )
+ {
+ i1 = jp + 2 - i;
+ if( array[i1-1] )
+ {
+ if( array[i1-2] )
+ {
+ k1 = i1 - 1;
+ k2 = n - j1;
+ }
+ else
+ {
+ k1 = i1 - 1;
+ k2 = n + 1 - j1;
+ }
+ goto leave;
+ }
+ }
+ k1 = 1;
+ k2 = n + 1 - m;
+ }
+ leave:
+ /* Now complement the two selected bits. */
+ array[k1-1] = !array[k1-1];
+ array[k2-1] = !array[k2-1];
+}
+
+
+/* Generate a new prime number of PRIME_BITS bits and store it in
+ PRIME. If FACTOR_BITS is non-zero, one of the prime factors of
+ (prime - 1) / 2 must be FACTOR_BITS bits long. If FACTORS is
+ non-zero, allocate a new, NULL-terminated array holding the prime
+ factors and store it in FACTORS. FLAGS might be used to influence
+ the prime number generation process. */
+gcry_err_code_t
+_gcry_prime_generate (gcry_mpi_t *prime, unsigned int prime_bits,
+ unsigned int factor_bits, gcry_mpi_t **factors,
+ gcry_prime_check_func_t cb_func, void *cb_arg,
+ gcry_random_level_t random_level,
+ unsigned int flags)
+{
+ gcry_err_code_t rc = 0;
+ gcry_mpi_t *factors_generated = NULL;
+ gcry_mpi_t prime_generated = NULL;
+ unsigned int mode = 0;
+
+ if (!prime)
+ return GPG_ERR_INV_ARG;
+ *prime = NULL;
+
+ if (flags & GCRY_PRIME_FLAG_SPECIAL_FACTOR)
+ mode = 1;
+
+ /* Generate. */
+ rc = prime_generate_internal ((mode==1), &prime_generated, prime_bits,
+ factor_bits, NULL,
+ factors? &factors_generated : NULL,
+ random_level, flags, 1,
+ cb_func, cb_arg);
+
+ if (!rc && cb_func)
+ {
+ /* Additional check. */
+ if ( !cb_func (cb_arg, GCRY_PRIME_CHECK_AT_FINISH, prime_generated))
+ {
+ /* Failed, deallocate resources. */
+ unsigned int i;
+
+ mpi_free (prime_generated);
+ if (factors)
+ {
+ for (i = 0; factors_generated[i]; i++)
+ mpi_free (factors_generated[i]);
+ xfree (factors_generated);
+ }
+ rc = GPG_ERR_GENERAL;
+ }
+ }
+
+ if (!rc)
+ {
+ if (factors)
+ *factors = factors_generated;
+ *prime = prime_generated;
+ }
+
+ return rc;
+}
+
+/* Check whether the number X is prime. */
+gcry_err_code_t
+_gcry_prime_check (gcry_mpi_t x, unsigned int flags)
+{
+ (void)flags;
+
+ switch (mpi_cmp_ui (x, 2))
+ {
+ case 0: return 0; /* 2 is a prime */
+ case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes. */
+ }
+
+ /* We use 64 rounds because the prime we are going to test is not
+ guaranteed to be a random one. */
+ if (check_prime (x, mpi_const (MPI_C_TWO), 64, NULL, NULL))
+ return 0;
+
+ return GPG_ERR_NO_PRIME;
+}
+
+
+/* Check whether the number X is prime according to FIPS 186-4 table C.2. */
+gcry_err_code_t
+_gcry_fips186_4_prime_check (gcry_mpi_t x, unsigned int bits)
+{
+ gcry_err_code_t ec = GPG_ERR_NO_ERROR;
+
+ switch (mpi_cmp_ui (x, 2))
+ {
+ case 0: return ec; /* 2 is a prime */
+ case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes. */
+ }
+
+ /* We use 5 or 4 rounds as specified in table C.2 */
+ if (! check_prime (x, mpi_const (MPI_C_TWO), bits > 1024 ? 4 : 5, NULL, NULL))
+ ec = GPG_ERR_NO_PRIME;
+
+ return ec;
+}
+
+
+/* Find a generator for PRIME where the factorization of (prime-1) is
+ in the NULL terminated array FACTORS. Return the generator as a
+ newly allocated MPI in R_G. If START_G is not NULL, use this as s
+ atart for the search. Returns 0 on success.*/
+gcry_err_code_t
+_gcry_prime_group_generator (gcry_mpi_t *r_g,
+ gcry_mpi_t prime, gcry_mpi_t *factors,
+ gcry_mpi_t start_g)
+{
+ gcry_mpi_t tmp, b, pmin1, g;
+ int first, i, n;
+
+ if (!r_g)
+ return GPG_ERR_INV_ARG;
+ *r_g = NULL;
+ if (!factors || !prime)
+ return GPG_ERR_INV_ARG;
+
+ for (n=0; factors[n]; n++)
+ ;
+ if (n < 2)
+ return GPG_ERR_INV_ARG;
+
+ tmp = mpi_new (0);
+ b = mpi_new (0);
+ pmin1 = mpi_new (0);
+ g = start_g? mpi_copy (start_g) : mpi_set_ui (NULL, 3);
+
+ /* Extra sanity check - usually disabled. */
+/* mpi_set (tmp, factors[0]); */
+/* for(i = 1; i < n; i++) */
+/* mpi_mul (tmp, tmp, factors[i]); */
+/* mpi_add_ui (tmp, tmp, 1); */
+/* if (mpi_cmp (prime, tmp)) */
+/* return gpg_error (GPG_ERR_INV_ARG); */
+
+ mpi_sub_ui (pmin1, prime, 1);
+ first = 1;
+ do
+ {
+ if (first)
+ first = 0;
+ else
+ mpi_add_ui (g, g, 1);
+
+ if (DBG_CIPHER)
+ log_printmpi ("checking g", g);
+ else
+ progress('^');
+
+ for (i = 0; i < n; i++)
+ {
+ mpi_fdiv_q (tmp, pmin1, factors[i]);
+ mpi_powm (b, g, tmp, prime);
+ if (! mpi_cmp_ui (b, 1))
+ break;
+ }
+ if (DBG_CIPHER)
+ progress('\n');
+ }
+ while (i < n);
+
+ _gcry_mpi_release (tmp);
+ _gcry_mpi_release (b);
+ _gcry_mpi_release (pmin1);
+ *r_g = g;
+
+ return 0;
+}
+
+/* Convenience function to release the factors array. */
+void
+_gcry_prime_release_factors (gcry_mpi_t *factors)
+{
+ if (factors)
+ {
+ int i;
+
+ for (i=0; factors[i]; i++)
+ mpi_free (factors[i]);
+ xfree (factors);
+ }
+}
+
+
+
+/* Helper for _gcry_derive_x931_prime. */
+static gcry_mpi_t
+find_x931_prime (const gcry_mpi_t pfirst)
+{
+ gcry_mpi_t val_2 = mpi_alloc_set_ui (2);
+ gcry_mpi_t prime;
+
+ prime = mpi_copy (pfirst);
+ /* If P is even add 1. */
+ mpi_set_bit (prime, 0);
+
+ /* We use 64 Rabin-Miller rounds which is better and thus
+ sufficient. We do not have a Lucas test implementation thus we
+ can't do it in the X9.31 preferred way of running a few
+ Rabin-Miller followed by one Lucas test. */
+ while ( !check_prime (prime, val_2, 64, NULL, NULL) )
+ mpi_add_ui (prime, prime, 2);
+
+ mpi_free (val_2);
+
+ return prime;
+}
+
+
+/* Generate a prime using the algorithm from X9.31 appendix B.4.
+
+ This function requires that the provided public exponent E is odd.
+ XP, XP1 and XP2 are the seed values. All values are mandatory.
+
+ On success the prime is returned. If R_P1 or R_P2 are given the
+ internal values P1 and P2 are saved at these addresses. On error
+ NULL is returned. */
+gcry_mpi_t
+_gcry_derive_x931_prime (const gcry_mpi_t xp,
+ const gcry_mpi_t xp1, const gcry_mpi_t xp2,
+ const gcry_mpi_t e,
+ gcry_mpi_t *r_p1, gcry_mpi_t *r_p2)
+{
+ gcry_mpi_t p1, p2, p1p2, yp0;
+
+ if (!xp || !xp1 || !xp2)
+ return NULL;
+ if (!e || !mpi_test_bit (e, 0))
+ return NULL; /* We support only odd values for E. */
+
+ p1 = find_x931_prime (xp1);
+ p2 = find_x931_prime (xp2);
+ p1p2 = mpi_alloc_like (xp);
+ mpi_mul (p1p2, p1, p2);
+
+ {
+ gcry_mpi_t r1, tmp;
+
+ /* r1 = (p2^{-1} mod p1)p2 - (p1^{-1} mod p2) */
+ tmp = mpi_alloc_like (p1);
+ mpi_invm (tmp, p2, p1);
+ mpi_mul (tmp, tmp, p2);
+ r1 = tmp;
+
+ tmp = mpi_alloc_like (p2);
+ mpi_invm (tmp, p1, p2);
+ mpi_mul (tmp, tmp, p1);
+ mpi_sub (r1, r1, tmp);
+
+ /* Fixup a negative value. */
+ if (mpi_has_sign (r1))
+ mpi_add (r1, r1, p1p2);
+
+ /* yp0 = xp + (r1 - xp mod p1*p2) */
+ yp0 = tmp; tmp = NULL;
+ mpi_subm (yp0, r1, xp, p1p2);
+ mpi_add (yp0, yp0, xp);
+ mpi_free (r1);
+
+ /* Fixup a negative value. */
+ if (mpi_cmp (yp0, xp) < 0 )
+ mpi_add (yp0, yp0, p1p2);
+ }
+
+ /* yp0 is now the first integer greater than xp with p1 being a
+ large prime factor of yp0-1 and p2 a large prime factor of yp0+1. */
+
+ /* Note that the first example from X9.31 (D.1.1) which uses
+ (Xq1 #1A5CF72EE770DE50CB09ACCEA9#)
+ (Xq2 #134E4CAA16D2350A21D775C404#)
+ (Xq #CC1092495D867E64065DEE3E7955F2EBC7D47A2D
+ 7C9953388F97DDDC3E1CA19C35CA659EDC2FC325
+ 6D29C2627479C086A699A49C4C9CEE7EF7BD1B34
+ 321DE34A#))))
+ returns an yp0 of
+ #CC1092495D867E64065DEE3E7955F2EBC7D47A2D
+ 7C9953388F97DDDC3E1CA19C35CA659EDC2FC4E3
+ BF20CB896EE37E098A906313271422162CB6C642
+ 75C1201F#
+ and not
+ #CC1092495D867E64065DEE3E7955F2EBC7D47A2D
+ 7C9953388F97DDDC3E1CA19C35CA659EDC2FC2E6
+ C88FE299D52D78BE405A97E01FD71DD7819ECB91
+ FA85A076#
+ as stated in the standard. This seems to be a bug in X9.31.
+ */
+
+ {
+ gcry_mpi_t val_2 = mpi_alloc_set_ui (2);
+ gcry_mpi_t gcdtmp = mpi_alloc_like (yp0);
+ int gcdres;
+
+ mpi_sub_ui (p1p2, p1p2, 1); /* Adjust for loop body. */
+ mpi_sub_ui (yp0, yp0, 1); /* Ditto. */
+ for (;;)
+ {
+ gcdres = mpi_gcd (gcdtmp, e, yp0);
+ mpi_add_ui (yp0, yp0, 1);
+ if (!gcdres)
+ progress ('/'); /* gcd (e, yp0-1) != 1 */
+ else if (check_prime (yp0, val_2, 64, NULL, NULL))
+ break; /* Found. */
+ /* We add p1p2-1 because yp0 is incremented after the gcd test. */
+ mpi_add (yp0, yp0, p1p2);
+ }
+ mpi_free (gcdtmp);
+ mpi_free (val_2);
+ }
+
+ mpi_free (p1p2);
+
+ progress('\n');
+ if (r_p1)
+ *r_p1 = p1;
+ else
+ mpi_free (p1);
+ if (r_p2)
+ *r_p2 = p2;
+ else
+ mpi_free (p2);
+ return yp0;
+}
+
+
+
+/* Generate the two prime used for DSA using the algorithm specified
+ in FIPS 186-2. PBITS is the desired length of the prime P and a
+ QBITS the length of the prime Q. If SEED is not supplied and
+ SEEDLEN is 0 the function generates an appropriate SEED. On
+ success the generated primes are stored at R_Q and R_P, the counter
+ value is stored at R_COUNTER and the seed actually used for
+ generation is stored at R_SEED and R_SEEDVALUE. */
+gpg_err_code_t
+_gcry_generate_fips186_2_prime (unsigned int pbits, unsigned int qbits,
+ const void *seed, size_t seedlen,
+ gcry_mpi_t *r_q, gcry_mpi_t *r_p,
+ int *r_counter,
+ void **r_seed, size_t *r_seedlen)
+{
+ gpg_err_code_t ec;
+ unsigned char seed_help_buffer[160/8]; /* Used to hold a generated SEED. */
+ unsigned char *seed_plus; /* Malloced buffer to hold SEED+x. */
+ unsigned char digest[160/8]; /* Helper buffer for SHA-1 digest. */
+ gcry_mpi_t val_2 = NULL; /* Helper for the prime test. */
+ gcry_mpi_t tmpval = NULL; /* Helper variable. */
+ int i;
+
+ unsigned char value_u[160/8];
+ int value_n, value_b, value_k;
+ int counter;
+ gcry_mpi_t value_w = NULL;
+ gcry_mpi_t value_x = NULL;
+ gcry_mpi_t prime_q = NULL;
+ gcry_mpi_t prime_p = NULL;
+
+ /* FIPS 186-2 allows only for 1024/160 bit. */
+ if (pbits != 1024 || qbits != 160)
+ return GPG_ERR_INV_KEYLEN;
+
+ if (!seed && !seedlen)
+ ; /* No seed value given: We are asked to generate it. */
+ else if (!seed || seedlen < qbits/8)
+ return GPG_ERR_INV_ARG;
+
+ /* Allocate a buffer to later compute SEED+some_increment. */
+ seed_plus = xtrymalloc (seedlen < 20? 20:seedlen);
+ if (!seed_plus)
+ {
+ ec = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ val_2 = mpi_alloc_set_ui (2);
+ value_n = (pbits - 1) / qbits;
+ value_b = (pbits - 1) - value_n * qbits;
+ value_w = mpi_new (pbits);
+ value_x = mpi_new (pbits);
+
+ restart:
+ /* Generate Q. */
+ for (;;)
+ {
+ /* Step 1: Generate a (new) seed unless one has been supplied. */
+ if (!seed)
+ {
+ seedlen = sizeof seed_help_buffer;
+ _gcry_create_nonce (seed_help_buffer, seedlen);
+ seed = seed_help_buffer;
+ }
+
+ /* Step 2: U = sha1(seed) ^ sha1((seed+1) mod 2^{qbits}) */
+ memcpy (seed_plus, seed, seedlen);
+ for (i=seedlen-1; i >= 0; i--)
+ {
+ seed_plus[i]++;
+ if (seed_plus[i])
+ break;
+ }
+ _gcry_md_hash_buffer (GCRY_MD_SHA1, value_u, seed, seedlen);
+ _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
+ for (i=0; i < sizeof value_u; i++)
+ value_u[i] ^= digest[i];
+
+ /* Step 3: Form q from U */
+ _gcry_mpi_release (prime_q); prime_q = NULL;
+ ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
+ value_u, sizeof value_u, NULL);
+ if (ec)
+ goto leave;
+ mpi_set_highbit (prime_q, qbits-1 );
+ mpi_set_bit (prime_q, 0);
+
+ /* Step 4: Test whether Q is prime using 64 round of Rabin-Miller. */
+ if (check_prime (prime_q, val_2, 64, NULL, NULL))
+ break; /* Yes, Q is prime. */
+
+ /* Step 5. */
+ seed = NULL; /* Force a new seed at Step 1. */
+ }
+
+ /* Step 6. Note that we do no use an explicit offset but increment
+ SEED_PLUS accordingly. SEED_PLUS is currently SEED+1. */
+ counter = 0;
+
+ /* Generate P. */
+ prime_p = mpi_new (pbits);
+ for (;;)
+ {
+ /* Step 7: For k = 0,...n let
+ V_k = sha1(seed+offset+k) mod 2^{qbits}
+ Step 8: W = V_0 + V_1*2^160 +
+ ...
+ + V_{n-1}*2^{(n-1)*160}
+ + (V_{n} mod 2^b)*2^{n*160}
+ */
+ mpi_set_ui (value_w, 0);
+ for (value_k=0; value_k <= value_n; value_k++)
+ {
+ /* There is no need to have an explicit offset variable: In
+ the first round we shall have an offset of 2, this is
+ achieved by using SEED_PLUS which is already at SEED+1,
+ thus we just need to increment it once again. The
+ requirement for the next round is to update offset by N,
+ which we implictly did at the end of this loop, and then
+ to add one; this one is the same as in the first round. */
+ for (i=seedlen-1; i >= 0; i--)
+ {
+ seed_plus[i]++;
+ if (seed_plus[i])
+ break;
+ }
+ _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
+
+ _gcry_mpi_release (tmpval); tmpval = NULL;
+ ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
+ digest, sizeof digest, NULL);
+ if (ec)
+ goto leave;
+ if (value_k == value_n)
+ mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */
+ mpi_lshift (tmpval, tmpval, value_k*qbits);
+ mpi_add (value_w, value_w, tmpval);
+ }
+
+ /* Step 8 continued: X = W + 2^{L-1} */
+ mpi_set_ui (value_x, 0);
+ mpi_set_highbit (value_x, pbits-1);
+ mpi_add (value_x, value_x, value_w);
+
+ /* Step 9: c = X mod 2q, p = X - (c - 1) */
+ mpi_mul_2exp (tmpval, prime_q, 1);
+ mpi_mod (tmpval, value_x, tmpval);
+ mpi_sub_ui (tmpval, tmpval, 1);
+ mpi_sub (prime_p, value_x, tmpval);
+
+ /* Step 10: If p < 2^{L-1} skip the primality test. */
+ /* Step 11 and 12: Primality test. */
+ if (mpi_get_nbits (prime_p) >= pbits-1
+ && check_prime (prime_p, val_2, 64, NULL, NULL) )
+ break; /* Yes, P is prime, continue with Step 15. */
+
+ /* Step 13: counter = counter + 1, offset = offset + n + 1. */
+ counter++;
+
+ /* Step 14: If counter >= 2^12 goto Step 1. */
+ if (counter >= 4096)
+ goto restart;
+ }
+
+ /* Step 15: Save p, q, counter and seed. */
+/* log_debug ("fips186-2 pbits p=%u q=%u counter=%d\n", */
+/* mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */
+/* log_printhex("fips186-2 seed:", seed, seedlen); */
+/* log_mpidump ("fips186-2 prime p", prime_p); */
+/* log_mpidump ("fips186-2 prime q", prime_q); */
+ if (r_q)
+ {
+ *r_q = prime_q;
+ prime_q = NULL;
+ }
+ if (r_p)
+ {
+ *r_p = prime_p;
+ prime_p = NULL;
+ }
+ if (r_counter)
+ *r_counter = counter;
+ if (r_seed && r_seedlen)
+ {
+ memcpy (seed_plus, seed, seedlen);
+ *r_seed = seed_plus;
+ seed_plus = NULL;
+ *r_seedlen = seedlen;
+ }
+
+
+ leave:
+ _gcry_mpi_release (tmpval);
+ _gcry_mpi_release (value_x);
+ _gcry_mpi_release (value_w);
+ _gcry_mpi_release (prime_p);
+ _gcry_mpi_release (prime_q);
+ xfree (seed_plus);
+ _gcry_mpi_release (val_2);
+ return ec;
+}
+
+
+
+/* WARNING: The code below has not yet been tested!
+ *
+ * Generate the two prime used for DSA using the algorithm specified
+ * in FIPS 186-3, A.1.1.2. PBITS is the desired length of the prime P
+ * and a QBITS the length of the prime Q. If SEED is not supplied and
+ * SEEDLEN is 0 the function generates an appropriate SEED. On
+ * success the generated primes are stored at R_Q and R_P, the counter
+ * value is stored at R_COUNTER and the seed actually used for
+ * generation is stored at R_SEED and R_SEEDVALUE. The hash algorithm
+ * used is stored at R_HASHALGO.
+ *
+ * Note that this function is very similar to the fips186_2 code. Due
+ * to the minor differences, other buffer sizes and for documentarion,
+ * we use a separate function.
+ */
+gpg_err_code_t
+_gcry_generate_fips186_3_prime (unsigned int pbits, unsigned int qbits,
+ const void *seed, size_t seedlen,
+ gcry_mpi_t *r_q, gcry_mpi_t *r_p,
+ int *r_counter,
+ void **r_seed, size_t *r_seedlen,
+ int *r_hashalgo)
+{
+ gpg_err_code_t ec;
+ unsigned char seed_help_buffer[256/8]; /* Used to hold a generated SEED. */
+ unsigned char *seed_plus; /* Malloced buffer to hold SEED+x. */
+ unsigned char digest[256/8]; /* Helper buffer for SHA-2 digest. */
+ gcry_mpi_t val_2 = NULL; /* Helper for the prime test. */
+ gcry_mpi_t tmpval = NULL; /* Helper variable. */
+ int hashalgo; /* The id of the Approved Hash Function. */
+ int i;
+
+ unsigned char value_u[256/8];
+ int value_n, value_b, value_j;
+ int counter;
+ gcry_mpi_t value_w = NULL;
+ gcry_mpi_t value_x = NULL;
+ gcry_mpi_t prime_q = NULL;
+ gcry_mpi_t prime_p = NULL;
+
+ gcry_assert (sizeof seed_help_buffer == sizeof digest
+ && sizeof seed_help_buffer == sizeof value_u);
+
+ /* Step 1: Check the requested prime lengths. */
+ /* Note that due to the size of our buffers QBITS is limited to 256. */
+ if (pbits == 2048 && qbits == 224)
+ hashalgo = GCRY_MD_SHA224;
+ else if (pbits == 2048 && qbits == 256)
+ hashalgo = GCRY_MD_SHA256;
+ else if (pbits == 3072 && qbits == 256)
+ hashalgo = GCRY_MD_SHA256;
+ else
+ return GPG_ERR_INV_KEYLEN;
+
+ /* Also check that the hash algorithm is available. */
+ ec = _gcry_md_test_algo (hashalgo);
+ if (ec)
+ return ec;
+ gcry_assert (qbits/8 <= sizeof digest);
+ gcry_assert (_gcry_md_get_algo_dlen (hashalgo) == qbits/8);
+
+
+ /* Step 2: Check seedlen. */
+ if (!seed && !seedlen)
+ ; /* No seed value given: We are asked to generate it. */
+ else if (!seed || seedlen < qbits/8)
+ return GPG_ERR_INV_ARG;
+
+ /* Allocate a buffer to later compute SEED+some_increment and a few
+ helper variables. */
+ seed_plus = xtrymalloc (seedlen < sizeof seed_help_buffer?
+ sizeof seed_help_buffer : seedlen);
+ if (!seed_plus)
+ {
+ ec = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+ val_2 = mpi_alloc_set_ui (2);
+ value_w = mpi_new (pbits);
+ value_x = mpi_new (pbits);
+
+ /* Step 3: n = \lceil L / outlen \rceil - 1 */
+ value_n = (pbits + qbits - 1) / qbits - 1;
+ /* Step 4: b = L - 1 - (n * outlen) */
+ value_b = pbits - 1 - (value_n * qbits);
+
+ restart:
+ /* Generate Q. */
+ for (;;)
+ {
+ /* Step 5: Generate a (new) seed unless one has been supplied. */
+ if (!seed)
+ {
+ seedlen = qbits/8;
+ gcry_assert (seedlen <= sizeof seed_help_buffer);
+ _gcry_create_nonce (seed_help_buffer, seedlen);
+ seed = seed_help_buffer;
+ }
+
+ /* Step 6: U = hash(seed) */
+ _gcry_md_hash_buffer (hashalgo, value_u, seed, seedlen);
+
+ /* Step 7: q = 2^{N-1} + U + 1 - (U mod 2) */
+ if ( !(value_u[qbits/8-1] & 0x01) )
+ {
+ for (i=qbits/8-1; i >= 0; i--)
+ {
+ value_u[i]++;
+ if (value_u[i])
+ break;
+ }
+ }
+ _gcry_mpi_release (prime_q); prime_q = NULL;
+ ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
+ value_u, qbits/8, NULL);
+ if (ec)
+ goto leave;
+ mpi_set_highbit (prime_q, qbits-1 );
+
+ /* Step 8: Test whether Q is prime using 64 round of Rabin-Miller.
+ According to table C.1 this is sufficient for all
+ supported prime sizes (i.e. up 3072/256). */
+ if (check_prime (prime_q, val_2, 64, NULL, NULL))
+ break; /* Yes, Q is prime. */
+
+ /* Step 8. */
+ seed = NULL; /* Force a new seed at Step 5. */
+ }
+
+ /* Step 11. Note that we do no use an explicit offset but increment
+ SEED_PLUS accordingly. */
+ memcpy (seed_plus, seed, seedlen);
+ counter = 0;
+
+ /* Generate P. */
+ prime_p = mpi_new (pbits);
+ for (;;)
+ {
+ /* Step 11.1: For j = 0,...n let
+ V_j = hash(seed+offset+j)
+ Step 11.2: W = V_0 + V_1*2^outlen +
+ ...
+ + V_{n-1}*2^{(n-1)*outlen}
+ + (V_{n} mod 2^b)*2^{n*outlen}
+ */
+ mpi_set_ui (value_w, 0);
+ for (value_j=0; value_j <= value_n; value_j++)
+ {
+ /* There is no need to have an explicit offset variable: In
+ the first round we shall have an offset of 1 and a j of
+ 0. This is achieved by incrementing SEED_PLUS here. For
+ the next round offset is implicitly updated by using
+ SEED_PLUS again. */
+ for (i=seedlen-1; i >= 0; i--)
+ {
+ seed_plus[i]++;
+ if (seed_plus[i])
+ break;
+ }
+ _gcry_md_hash_buffer (hashalgo, digest, seed_plus, seedlen);
+
+ _gcry_mpi_release (tmpval); tmpval = NULL;
+ ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
+ digest, qbits/8, NULL);
+ if (ec)
+ goto leave;
+ if (value_j == value_n)
+ mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */
+ mpi_lshift (tmpval, tmpval, value_j*qbits);
+ mpi_add (value_w, value_w, tmpval);
+ }
+
+ /* Step 11.3: X = W + 2^{L-1} */
+ mpi_set_ui (value_x, 0);
+ mpi_set_highbit (value_x, pbits-1);
+ mpi_add (value_x, value_x, value_w);
+
+ /* Step 11.4: c = X mod 2q */
+ mpi_mul_2exp (tmpval, prime_q, 1);
+ mpi_mod (tmpval, value_x, tmpval);
+
+ /* Step 11.5: p = X - (c - 1) */
+ mpi_sub_ui (tmpval, tmpval, 1);
+ mpi_sub (prime_p, value_x, tmpval);
+
+ /* Step 11.6: If p < 2^{L-1} skip the primality test. */
+ /* Step 11.7 and 11.8: Primality test. */
+ if (mpi_get_nbits (prime_p) >= pbits-1
+ && check_prime (prime_p, val_2, 64, NULL, NULL) )
+ break; /* Yes, P is prime, continue with Step 15. */
+
+ /* Step 11.9: counter = counter + 1, offset = offset + n + 1.
+ If counter >= 4L goto Step 5. */
+ counter++;
+ if (counter >= 4*pbits)
+ goto restart;
+ }
+
+ /* Step 12: Save p, q, counter and seed. */
+ /* log_debug ("fips186-3 pbits p=%u q=%u counter=%d\n", */
+ /* mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */
+ /* log_printhex ("fips186-3 seed", seed, seedlen); */
+ /* log_printmpi ("fips186-3 p", prime_p); */
+ /* log_printmpi ("fips186-3 q", prime_q); */
+
+ if (r_q)
+ {
+ *r_q = prime_q;
+ prime_q = NULL;
+ }
+ if (r_p)
+ {
+ *r_p = prime_p;
+ prime_p = NULL;
+ }
+ if (r_counter)
+ *r_counter = counter;
+ if (r_seed && r_seedlen)
+ {
+ memcpy (seed_plus, seed, seedlen);
+ *r_seed = seed_plus;
+ seed_plus = NULL;
+ *r_seedlen = seedlen;
+ }
+ if (r_hashalgo)
+ *r_hashalgo = hashalgo;
+
+ leave:
+ _gcry_mpi_release (tmpval);
+ _gcry_mpi_release (value_x);
+ _gcry_mpi_release (value_w);
+ _gcry_mpi_release (prime_p);
+ _gcry_mpi_release (prime_q);
+ xfree (seed_plus);
+ _gcry_mpi_release (val_2);
+ return ec;
+}
diff --git a/comm/third_party/libgcrypt/cipher/pubkey-internal.h b/comm/third_party/libgcrypt/cipher/pubkey-internal.h
new file mode 100644
index 0000000000..d31e26f392
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/pubkey-internal.h
@@ -0,0 +1,105 @@
+/* pubkey-internal.h - Internal defs for pubkey.c
+ * Copyright (C) 2013 g10 code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_PUBKEY_INTERNAL_H
+#define GCRY_PUBKEY_INTERNAL_H
+
+/*-- pubkey-util.c --*/
+gpg_err_code_t _gcry_pk_util_parse_flaglist (gcry_sexp_t list,
+ int *r_flags,
+ enum pk_encoding *r_encoding);
+gpg_err_code_t _gcry_pk_util_get_nbits (gcry_sexp_t list,
+ unsigned int *r_nbits);
+gpg_err_code_t _gcry_pk_util_get_rsa_use_e (gcry_sexp_t list,
+ unsigned long *r_e);
+gpg_err_code_t _gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig,
+ const char **algo_names,
+ gcry_sexp_t *r_parms,
+ int *r_eccflags);
+gpg_err_code_t _gcry_pk_util_preparse_encval (gcry_sexp_t sexp,
+ const char **algo_names,
+ gcry_sexp_t *r_parms,
+ struct pk_encoding_ctx *ctx);
+void _gcry_pk_util_init_encoding_ctx (struct pk_encoding_ctx *ctx,
+ enum pk_operation op,
+ unsigned int nbits);
+void _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx);
+gcry_err_code_t _gcry_pk_util_data_to_mpi (gcry_sexp_t input,
+ gcry_mpi_t *ret_mpi,
+ struct pk_encoding_ctx *ctx);
+
+
+
+/*-- rsa-common.c --*/
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_enc (gcry_mpi_t *r_result, unsigned int nbits,
+ const unsigned char *value, size_t valuelen,
+ const unsigned char *random_override,
+ size_t random_override_len);
+gpg_err_code_t
+_gcry_rsa_pkcs1_decode_for_enc (unsigned char **r_result, size_t *r_resultlen,
+ unsigned int nbits, gcry_mpi_t value);
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_raw_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+ const unsigned char *value, size_t valuelen);
+
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+ const unsigned char *value, size_t valuelen,
+ int algo);
+gpg_err_code_t
+_gcry_rsa_oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+ const unsigned char *value, size_t valuelen,
+ const unsigned char *label, size_t labellen,
+ const void *random_override, size_t random_override_len);
+gpg_err_code_t
+_gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen,
+ unsigned int nbits, int algo,
+ gcry_mpi_t value,
+ const unsigned char *label, size_t labellen);
+gpg_err_code_t
+_gcry_rsa_pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+ const unsigned char *value, size_t valuelen, int saltlen,
+ const void *random_override, size_t random_override_len);
+gpg_err_code_t
+_gcry_rsa_pss_verify (gcry_mpi_t value, gcry_mpi_t encoded,
+ unsigned int nbits, int algo, size_t saltlen);
+
+
+
+/*-- dsa-common.c --*/
+void _gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits);
+gcry_mpi_t _gcry_dsa_gen_k (gcry_mpi_t q, int security_level);
+gpg_err_code_t _gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k,
+ gcry_mpi_t dsa_q, gcry_mpi_t dsa_x,
+ const unsigned char *h1,
+ unsigned int h1len,
+ int halgo,
+ unsigned int extraloops);
+
+gpg_err_code_t _gcry_dsa_normalize_hash (gcry_mpi_t input,
+ gcry_mpi_t *out,
+ unsigned int qbits);
+
+/*-- ecc.c --*/
+gpg_err_code_t _gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode,
+ mpi_ec_t ec);
+
+
+#endif /*GCRY_PUBKEY_INTERNAL_H*/
diff --git a/comm/third_party/libgcrypt/cipher/pubkey-util.c b/comm/third_party/libgcrypt/cipher/pubkey-util.c
new file mode 100644
index 0000000000..7ddef7dc31
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/pubkey-util.c
@@ -0,0 +1,1160 @@
+/* pubkey-util.c - Supporting functions for all pubkey modules.
+ * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005,
+ * 2007, 2008, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2015 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+/* Callback for the pubkey algorithm code to verify PSS signatures.
+ OPAQUE is the data provided by the actual caller. The meaning of
+ TMP depends on the actual algorithm (but there is only RSA); now
+ for RSA it is the output of running the public key function on the
+ input. */
+static int
+pss_verify_cmp (void *opaque, gcry_mpi_t tmp)
+{
+ struct pk_encoding_ctx *ctx = opaque;
+ gcry_mpi_t hash = ctx->verify_arg;
+
+ return _gcry_rsa_pss_verify (hash, tmp, ctx->nbits - 1,
+ ctx->hash_algo, ctx->saltlen);
+}
+
+
+/* Parser for a flag list. On return the encoding is stored at
+ R_ENCODING and the flags are stored at R_FLAGS. If any of them is
+ not needed, NULL may be passed. The function returns 0 on success
+ or an error code. */
+gpg_err_code_t
+_gcry_pk_util_parse_flaglist (gcry_sexp_t list,
+ int *r_flags, enum pk_encoding *r_encoding)
+{
+ gpg_err_code_t rc = 0;
+ const char *s;
+ size_t n;
+ int i;
+ int encoding = PUBKEY_ENC_UNKNOWN;
+ int flags = 0;
+ int igninvflag = 0;
+
+ for (i = list ? sexp_length (list)-1 : 0; i > 0; i--)
+ {
+ s = sexp_nth_data (list, i, &n);
+ if (!s)
+ continue; /* Not a data element. */
+
+ switch (n)
+ {
+ case 3:
+ if (!memcmp (s, "pss", 3) && encoding == PUBKEY_ENC_UNKNOWN)
+ {
+ encoding = PUBKEY_ENC_PSS;
+ flags |= PUBKEY_FLAG_FIXEDLEN;
+ }
+ else if (!memcmp (s, "raw", 3) && encoding == PUBKEY_ENC_UNKNOWN)
+ {
+ encoding = PUBKEY_ENC_RAW;
+ flags |= PUBKEY_FLAG_RAW_FLAG; /* Explicitly given. */
+ }
+ else if (!memcmp (s, "sm2", 3))
+ {
+ encoding = PUBKEY_ENC_RAW;
+ flags |= PUBKEY_FLAG_SM2 | PUBKEY_FLAG_RAW_FLAG;
+ }
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 4:
+ if (!memcmp (s, "comp", 4))
+ flags |= PUBKEY_FLAG_COMP;
+ else if (!memcmp (s, "oaep", 4) && encoding == PUBKEY_ENC_UNKNOWN)
+ {
+ encoding = PUBKEY_ENC_OAEP;
+ flags |= PUBKEY_FLAG_FIXEDLEN;
+ }
+ else if (!memcmp (s, "gost", 4))
+ {
+ encoding = PUBKEY_ENC_RAW;
+ flags |= PUBKEY_FLAG_GOST;
+ }
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 5:
+ if (!memcmp (s, "eddsa", 5))
+ {
+ encoding = PUBKEY_ENC_RAW;
+ flags |= PUBKEY_FLAG_EDDSA;
+ flags |= PUBKEY_FLAG_DJB_TWEAK;
+ }
+ else if (!memcmp (s, "pkcs1", 5) && encoding == PUBKEY_ENC_UNKNOWN)
+ {
+ encoding = PUBKEY_ENC_PKCS1;
+ flags |= PUBKEY_FLAG_FIXEDLEN;
+ }
+ else if (!memcmp (s, "param", 5))
+ flags |= PUBKEY_FLAG_PARAM;
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 6:
+ if (!memcmp (s, "nocomp", 6))
+ flags |= PUBKEY_FLAG_NOCOMP;
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 7:
+ if (!memcmp (s, "rfc6979", 7))
+ flags |= PUBKEY_FLAG_RFC6979;
+ else if (!memcmp (s, "noparam", 7))
+ ; /* Ignore - it is the default. */
+ else if (!memcmp (s, "prehash", 7))
+ flags |= PUBKEY_FLAG_PREHASH;
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 8:
+ if (!memcmp (s, "use-x931", 8))
+ flags |= PUBKEY_FLAG_USE_X931;
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 9:
+ if (!memcmp (s, "pkcs1-raw", 9) && encoding == PUBKEY_ENC_UNKNOWN)
+ {
+ encoding = PUBKEY_ENC_PKCS1_RAW;
+ flags |= PUBKEY_FLAG_FIXEDLEN;
+ }
+ else if (!memcmp (s, "djb-tweak", 9))
+ {
+ encoding = PUBKEY_ENC_RAW;
+ flags |= PUBKEY_FLAG_DJB_TWEAK;
+ }
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 10:
+ if (!memcmp (s, "igninvflag", 10))
+ igninvflag = 1;
+ else if (!memcmp (s, "no-keytest", 10))
+ flags |= PUBKEY_FLAG_NO_KEYTEST;
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 11:
+ if (!memcmp (s, "no-blinding", 11))
+ flags |= PUBKEY_FLAG_NO_BLINDING;
+ else if (!memcmp (s, "use-fips186", 11))
+ flags |= PUBKEY_FLAG_USE_FIPS186;
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ case 13:
+ if (!memcmp (s, "use-fips186-2", 13))
+ flags |= PUBKEY_FLAG_USE_FIPS186_2;
+ else if (!memcmp (s, "transient-key", 13))
+ flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+ else if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+
+ default:
+ if (!igninvflag)
+ rc = GPG_ERR_INV_FLAG;
+ break;
+ }
+ }
+
+ if (r_flags)
+ *r_flags = flags;
+ if (r_encoding)
+ *r_encoding = encoding;
+
+ return rc;
+}
+
+
+static int
+get_hash_algo (const char *s, size_t n)
+{
+ static const struct { const char *name; int algo; } hashnames[] = {
+ { "sha1", GCRY_MD_SHA1 },
+ { "md5", GCRY_MD_MD5 },
+ { "sha256", GCRY_MD_SHA256 },
+ { "ripemd160", GCRY_MD_RMD160 },
+ { "rmd160", GCRY_MD_RMD160 },
+ { "sha384", GCRY_MD_SHA384 },
+ { "sha512", GCRY_MD_SHA512 },
+ { "sha224", GCRY_MD_SHA224 },
+ { "md2", GCRY_MD_MD2 },
+ { "md4", GCRY_MD_MD4 },
+ { "tiger", GCRY_MD_TIGER },
+ { "haval", GCRY_MD_HAVAL },
+ { "sha3-224", GCRY_MD_SHA3_224 },
+ { "sha3-256", GCRY_MD_SHA3_256 },
+ { "sha3-384", GCRY_MD_SHA3_384 },
+ { "sha3-512", GCRY_MD_SHA3_512 },
+ { "sm3", GCRY_MD_SM3 },
+ { "shake128", GCRY_MD_SHAKE128 },
+ { "shake256", GCRY_MD_SHAKE256 },
+ { NULL, 0 }
+ };
+ int algo;
+ int i;
+
+ for (i=0; hashnames[i].name; i++)
+ {
+ if ( strlen (hashnames[i].name) == n
+ && !memcmp (hashnames[i].name, s, n))
+ break;
+ }
+ if (hashnames[i].name)
+ algo = hashnames[i].algo;
+ else
+ {
+ /* In case of not listed or dynamically allocated hash
+ algorithm we fall back to this somewhat slower
+ method. Further, it also allows to use OIDs as
+ algorithm names. */
+ char *tmpname;
+
+ tmpname = xtrymalloc (n+1);
+ if (!tmpname)
+ algo = 0; /* Out of core - silently give up. */
+ else
+ {
+ memcpy (tmpname, s, n);
+ tmpname[n] = 0;
+ algo = _gcry_md_map_name (tmpname);
+ xfree (tmpname);
+ }
+ }
+ return algo;
+}
+
+
+/* Get the "nbits" parameter from an s-expression of the format:
+ *
+ * (algo
+ * (parameter_name_1 ....)
+ * ....
+ * (parameter_name_n ....))
+ *
+ * Example:
+ *
+ * (rsa
+ * (nbits 4:2048))
+ *
+ * On success the value for nbits is stored at R_NBITS. If no nbits
+ * parameter is found, the function returns success and stores 0 at
+ * R_NBITS. For parsing errors the function returns an error code and
+ * stores 0 at R_NBITS.
+ */
+gpg_err_code_t
+_gcry_pk_util_get_nbits (gcry_sexp_t list, unsigned int *r_nbits)
+{
+ char buf[50];
+ const char *s;
+ size_t n;
+
+ *r_nbits = 0;
+
+ list = sexp_find_token (list, "nbits", 0);
+ if (!list)
+ return 0; /* No NBITS found. */
+
+ s = sexp_nth_data (list, 1, &n);
+ if (!s || n >= DIM (buf) - 1 )
+ {
+ /* NBITS given without a cdr. */
+ sexp_release (list);
+ return GPG_ERR_INV_OBJ;
+ }
+ memcpy (buf, s, n);
+ buf[n] = 0;
+ *r_nbits = (unsigned int)strtoul (buf, NULL, 0);
+ sexp_release (list);
+ return 0;
+}
+
+
+/* Get the optional "rsa-use-e" parameter from an s-expression of the
+ * format:
+ *
+ * (algo
+ * (parameter_name_1 ....)
+ * ....
+ * (parameter_name_n ....))
+ *
+ * Example:
+ *
+ * (rsa
+ * (nbits 4:2048)
+ * (rsa-use-e 2:41))
+ *
+ * On success the value for nbits is stored at R_E. If no rsa-use-e
+ * parameter is found, the function returns success and stores 65537 at
+ * R_E. For parsing errors the function returns an error code and
+ * stores 0 at R_E.
+ */
+gpg_err_code_t
+_gcry_pk_util_get_rsa_use_e (gcry_sexp_t list, unsigned long *r_e)
+{
+ char buf[50];
+ const char *s;
+ size_t n;
+
+ *r_e = 0;
+
+ list = sexp_find_token (list, "rsa-use-e", 0);
+ if (!list)
+ {
+ *r_e = 65537; /* Not given, use the value generated by old versions. */
+ return 0;
+ }
+
+ s = sexp_nth_data (list, 1, &n);
+ if (!s || n >= DIM (buf) - 1 )
+ {
+ /* No value or value too large. */
+ sexp_release (list);
+ return GPG_ERR_INV_OBJ;
+ }
+ memcpy (buf, s, n);
+ buf[n] = 0;
+ *r_e = strtoul (buf, NULL, 0);
+ sexp_release (list);
+ return 0;
+}
+
+
+/* Parse a "sig-val" s-expression and store the inner parameter list at
+ R_PARMS. ALGO_NAMES is used to verify that the algorithm in
+ "sig-val" is valid. Returns 0 on success and stores a new list at
+ R_PARMS which must be freed by the caller. On error R_PARMS is set
+ to NULL and an error code returned. If R_ECCFLAGS is not NULL flag
+ values are set into it; as of now they are only used with ecc
+ algorithms. */
+gpg_err_code_t
+_gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, const char **algo_names,
+ gcry_sexp_t *r_parms, int *r_eccflags)
+{
+ gpg_err_code_t rc;
+ gcry_sexp_t l1 = NULL;
+ gcry_sexp_t l2 = NULL;
+ char *name = NULL;
+ int i;
+
+ *r_parms = NULL;
+ if (r_eccflags)
+ *r_eccflags = 0;
+
+ /* Extract the signature value. */
+ l1 = sexp_find_token (s_sig, "sig-val", 0);
+ if (!l1)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Does not contain a signature value object. */
+ goto leave;
+ }
+
+ l2 = sexp_nth (l1, 1);
+ if (!l2)
+ {
+ rc = GPG_ERR_NO_OBJ; /* No cadr for the sig object. */
+ goto leave;
+ }
+ name = sexp_nth_string (l2, 0);
+ if (!name)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */
+ goto leave;
+ }
+ else if (!strcmp (name, "flags"))
+ {
+ /* Skip a "flags" parameter and look again for the algorithm
+ name. This is not used but here just for the sake of
+ consistent S-expressions we need to handle it. */
+ sexp_release (l2);
+ l2 = sexp_nth (l1, 2);
+ if (!l2)
+ {
+ rc = GPG_ERR_INV_OBJ;
+ goto leave;
+ }
+ xfree (name);
+ name = sexp_nth_string (l2, 0);
+ if (!name)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */
+ goto leave;
+ }
+ }
+
+ for (i=0; algo_names[i]; i++)
+ if (!stricmp (name, algo_names[i]))
+ break;
+ if (!algo_names[i])
+ {
+ rc = GPG_ERR_CONFLICT; /* "sig-val" uses an unexpected algo. */
+ goto leave;
+ }
+ if (r_eccflags)
+ {
+ if (!strcmp (name, "eddsa"))
+ *r_eccflags = PUBKEY_FLAG_EDDSA;
+ if (!strcmp (name, "gost"))
+ *r_eccflags = PUBKEY_FLAG_GOST;
+ if (!strcmp (name, "sm2"))
+ *r_eccflags = PUBKEY_FLAG_SM2;
+ }
+
+ *r_parms = l2;
+ l2 = NULL;
+ rc = 0;
+
+ leave:
+ xfree (name);
+ sexp_release (l2);
+ sexp_release (l1);
+ return rc;
+}
+
+
+/* Parse a "enc-val" s-expression and store the inner parameter list
+ at R_PARMS. ALGO_NAMES is used to verify that the algorithm in
+ "enc-val" is valid. Returns 0 on success and stores a new list at
+ R_PARMS which must be freed by the caller. On error R_PARMS is set
+ to NULL and an error code returned. If R_ECCFLAGS is not NULL flag
+ values are set into it; as of now they are only used with ecc
+ algorithms.
+
+ (enc-val
+ [(flags [raw, pkcs1, oaep, no-blinding])]
+ [(hash-algo <algo>)]
+ [(label <label>)]
+ (<algo>
+ (<param_name1> <mpi>)
+ ...
+ (<param_namen> <mpi>)))
+
+ HASH-ALGO and LABEL are specific to OAEP. CTX will be updated with
+ encoding information. */
+gpg_err_code_t
+_gcry_pk_util_preparse_encval (gcry_sexp_t sexp, const char **algo_names,
+ gcry_sexp_t *r_parms,
+ struct pk_encoding_ctx *ctx)
+{
+ gcry_err_code_t rc = 0;
+ gcry_sexp_t l1 = NULL;
+ gcry_sexp_t l2 = NULL;
+ char *name = NULL;
+ size_t n;
+ int parsed_flags = 0;
+ int i;
+
+ *r_parms = NULL;
+
+ /* Check that the first element is valid. */
+ l1 = sexp_find_token (sexp, "enc-val" , 0);
+ if (!l1)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Does not contain an encrypted value object. */
+ goto leave;
+ }
+
+ l2 = sexp_nth (l1, 1);
+ if (!l2)
+ {
+ rc = GPG_ERR_NO_OBJ; /* No cadr for the data object. */
+ goto leave;
+ }
+
+ /* Extract identifier of sublist. */
+ name = sexp_nth_string (l2, 0);
+ if (!name)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */
+ goto leave;
+ }
+
+ if (!strcmp (name, "flags"))
+ {
+ const char *s;
+
+ /* There is a flags element - process it. */
+ rc = _gcry_pk_util_parse_flaglist (l2, &parsed_flags, &ctx->encoding);
+ if (rc)
+ goto leave;
+ if (ctx->encoding == PUBKEY_ENC_PSS)
+ {
+ rc = GPG_ERR_CONFLICT;
+ goto leave;
+ }
+
+ /* Get the OAEP parameters HASH-ALGO and LABEL, if any. */
+ if (ctx->encoding == PUBKEY_ENC_OAEP)
+ {
+ /* Get HASH-ALGO. */
+ sexp_release (l2);
+ l2 = sexp_find_token (l1, "hash-algo", 0);
+ if (l2)
+ {
+ s = sexp_nth_data (l2, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else
+ {
+ ctx->hash_algo = get_hash_algo (s, n);
+ if (!ctx->hash_algo)
+ rc = GPG_ERR_DIGEST_ALGO;
+ }
+ if (rc)
+ goto leave;
+ }
+
+ /* Get LABEL. */
+ sexp_release (l2);
+ l2 = sexp_find_token (l1, "label", 0);
+ if (l2)
+ {
+ s = sexp_nth_data (l2, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else if (n > 0)
+ {
+ ctx->label = xtrymalloc (n);
+ if (!ctx->label)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ memcpy (ctx->label, s, n);
+ ctx->labellen = n;
+ }
+ }
+ if (rc)
+ goto leave;
+ }
+ }
+
+ /* Get the next which has the actual data - skip HASH-ALGO and LABEL. */
+ for (i = 2; (sexp_release (l2), l2 = sexp_nth (l1, i)); i++)
+ {
+ s = sexp_nth_data (l2, 0, &n);
+ if (!(n == 9 && !memcmp (s, "hash-algo", 9))
+ && !(n == 5 && !memcmp (s, "label", 5))
+ && !(n == 15 && !memcmp (s, "random-override", 15)))
+ break;
+ }
+ if (!l2)
+ {
+ rc = GPG_ERR_NO_OBJ; /* No cadr for the data object. */
+ goto leave;
+ }
+
+ /* Extract sublist identifier. */
+ xfree (name);
+ name = sexp_nth_string (l2, 0);
+ if (!name)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */
+ goto leave;
+ }
+ }
+ else /* No flags - flag as legacy structure. */
+ parsed_flags |= PUBKEY_FLAG_LEGACYRESULT;
+
+ for (i=0; algo_names[i]; i++)
+ if (!stricmp (name, algo_names[i]))
+ break;
+ if (!algo_names[i])
+ {
+ rc = GPG_ERR_CONFLICT; /* "enc-val" uses an unexpected algo. */
+ goto leave;
+ }
+
+ *r_parms = l2;
+ l2 = NULL;
+ ctx->flags |= parsed_flags;
+ rc = 0;
+
+ leave:
+ xfree (name);
+ sexp_release (l2);
+ sexp_release (l1);
+ return rc;
+}
+
+
+/* Initialize an encoding context. */
+void
+_gcry_pk_util_init_encoding_ctx (struct pk_encoding_ctx *ctx,
+ enum pk_operation op,
+ unsigned int nbits)
+{
+ ctx->op = op;
+ ctx->nbits = nbits;
+ ctx->encoding = PUBKEY_ENC_UNKNOWN;
+ ctx->flags = 0;
+ if (fips_mode ())
+ {
+ ctx->hash_algo = GCRY_MD_SHA256;
+ }
+ else
+ {
+ ctx->hash_algo = GCRY_MD_SHA1;
+ }
+ ctx->label = NULL;
+ ctx->labellen = 0;
+ ctx->saltlen = 20;
+ ctx->verify_cmp = NULL;
+ ctx->verify_arg = NULL;
+}
+
+/* Free a context initialzied by _gcry_pk_util_init_encoding_ctx. */
+void
+_gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx)
+{
+ xfree (ctx->label);
+}
+
+
+/* Take the hash value and convert into an MPI, suitable for
+ passing to the low level functions. We currently support the
+ old style way of passing just a MPI and the modern interface which
+ allows to pass flags so that we can choose between raw and pkcs1
+ padding - may be more padding options later.
+
+ (<mpi>)
+ or
+ (data
+ [(flags [raw, direct, pkcs1, oaep, pss,
+ no-blinding, rfc6979, eddsa, prehash])]
+ [(hash <algo> <value>)]
+ [(value <text>)]
+ [(hash-algo <algo>)]
+ [(label <label>)]
+ [(salt-length <length>)]
+ [(random-override <data>)]
+ )
+
+ Either the VALUE or the HASH element must be present for use
+ with signatures. VALUE is used for encryption.
+
+ HASH-ALGO is specific to OAEP and EDDSA.
+
+ LABEL is specific to OAEP.
+
+ SALT-LENGTH is for PSS it is limited to 16384 bytes.
+
+ RANDOM-OVERRIDE is used to replace random nonces for regression
+ testing. */
+gcry_err_code_t
+_gcry_pk_util_data_to_mpi (gcry_sexp_t input, gcry_mpi_t *ret_mpi,
+ struct pk_encoding_ctx *ctx)
+{
+ gcry_err_code_t rc = 0;
+ gcry_sexp_t ldata, lhash, lvalue;
+ size_t n;
+ const char *s;
+ int unknown_flag = 0;
+ int parsed_flags = 0;
+
+ *ret_mpi = NULL;
+ ldata = sexp_find_token (input, "data", 0);
+ if (!ldata)
+ { /* assume old style */
+ int mpifmt = (ctx->flags & PUBKEY_FLAG_RAW_FLAG) ?
+ GCRYMPI_FMT_OPAQUE : GCRYMPI_FMT_STD;
+
+ *ret_mpi = sexp_nth_mpi (input, 0, mpifmt);
+ return *ret_mpi ? GPG_ERR_NO_ERROR : GPG_ERR_INV_OBJ;
+ }
+
+ /* See whether there is a flags list. */
+ {
+ gcry_sexp_t lflags = sexp_find_token (ldata, "flags", 0);
+ if (lflags)
+ {
+ if (_gcry_pk_util_parse_flaglist (lflags,
+ &parsed_flags, &ctx->encoding))
+ unknown_flag = 1;
+ sexp_release (lflags);
+ }
+ }
+
+ if (ctx->encoding == PUBKEY_ENC_UNKNOWN)
+ ctx->encoding = PUBKEY_ENC_RAW; /* default to raw */
+
+ /* Get HASH or MPI */
+ lhash = sexp_find_token (ldata, "hash", 0);
+ lvalue = lhash? NULL : sexp_find_token (ldata, "value", 0);
+
+ if (!(!lhash ^ !lvalue))
+ rc = GPG_ERR_INV_OBJ; /* none or both given */
+ else if (unknown_flag)
+ rc = GPG_ERR_INV_FLAG;
+ else if (ctx->encoding == PUBKEY_ENC_RAW
+ && ((parsed_flags & PUBKEY_FLAG_EDDSA)
+ || (ctx->flags & PUBKEY_FLAG_EDDSA)))
+ {
+ /* Prepare for EdDSA. */
+ gcry_sexp_t list;
+ void *value;
+ size_t valuelen;
+
+ if (!lvalue)
+ {
+ rc = GPG_ERR_INV_OBJ;
+ goto leave;
+ }
+ /* Hash algo is determined by curve. No hash-algo is OK. */
+ /* Get HASH-ALGO. */
+ list = sexp_find_token (ldata, "hash-algo", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else
+ {
+ ctx->hash_algo = get_hash_algo (s, n);
+ if (!ctx->hash_algo)
+ rc = GPG_ERR_DIGEST_ALGO;
+ }
+ sexp_release (list);
+ }
+ if (rc)
+ goto leave;
+
+ /* Get LABEL. */
+ list = sexp_find_token (ldata, "label", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else if (n > 0)
+ {
+ ctx->label = xtrymalloc (n);
+ if (!ctx->label)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ memcpy (ctx->label, s, n);
+ ctx->labellen = n;
+ }
+ }
+ sexp_release (list);
+ if (rc)
+ goto leave;
+ }
+
+ /* Get VALUE. */
+ value = sexp_nth_buffer (lvalue, 1, &valuelen);
+ if (!value)
+ {
+ /* We assume that a zero length message is meant by
+ "(value)". This is commonly used by test vectors. Note
+ that S-expression do not allow zero length items. */
+ valuelen = 0;
+ value = xtrymalloc (1);
+ if (!value)
+ rc = gpg_err_code_from_syserror ();
+ }
+ else if ((valuelen * 8) < valuelen)
+ {
+ xfree (value);
+ rc = GPG_ERR_TOO_LARGE;
+ }
+ if (rc)
+ goto leave;
+
+ /* Note that mpi_set_opaque takes ownership of VALUE. */
+ *ret_mpi = mpi_set_opaque (NULL, value, valuelen*8);
+ }
+ else if (ctx->encoding == PUBKEY_ENC_RAW && lhash
+ && ((parsed_flags & PUBKEY_FLAG_RAW_FLAG)
+ || (parsed_flags & PUBKEY_FLAG_RFC6979)))
+ {
+ /* Raw encoding along with a hash element. This is commonly
+ used for DSA. For better backward error compatibility we
+ allow this only if either the rfc6979 flag has been given or
+ the raw flags was explicitly given. */
+ if (sexp_length (lhash) != 3)
+ rc = GPG_ERR_INV_OBJ;
+ else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ void *value;
+ size_t valuelen;
+
+ ctx->hash_algo = get_hash_algo (s, n);
+ if (!ctx->hash_algo)
+ rc = GPG_ERR_DIGEST_ALGO;
+ else if (!(value=sexp_nth_buffer (lhash, 2, &valuelen)))
+ rc = GPG_ERR_INV_OBJ;
+ else if ((valuelen * 8) < valuelen)
+ {
+ xfree (value);
+ rc = GPG_ERR_TOO_LARGE;
+ }
+ else
+ *ret_mpi = mpi_set_opaque (NULL, value, valuelen*8);
+ }
+ }
+ else if (ctx->encoding == PUBKEY_ENC_RAW && lvalue)
+ {
+ /* RFC6969 may only be used with the a hash value and not the
+ MPI based value. */
+ if (parsed_flags & PUBKEY_FLAG_RFC6979)
+ {
+ rc = GPG_ERR_CONFLICT;
+ goto leave;
+ }
+
+ /* Get the value */
+ *ret_mpi = sexp_nth_mpi (lvalue, 1, GCRYMPI_FMT_USG);
+ if (!*ret_mpi)
+ rc = GPG_ERR_INV_OBJ;
+ }
+ else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lvalue
+ && ctx->op == PUBKEY_OP_ENCRYPT)
+ {
+ const void * value;
+ size_t valuelen;
+ gcry_sexp_t list;
+ void *random_override = NULL;
+ size_t random_override_len = 0;
+
+ if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ /* Get optional RANDOM-OVERRIDE. */
+ list = sexp_find_token (ldata, "random-override", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else if (n > 0)
+ {
+ random_override = xtrymalloc (n);
+ if (!random_override)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ memcpy (random_override, s, n);
+ random_override_len = n;
+ }
+ }
+ sexp_release (list);
+ if (rc)
+ goto leave;
+ }
+
+ rc = _gcry_rsa_pkcs1_encode_for_enc (ret_mpi, ctx->nbits,
+ value, valuelen,
+ random_override,
+ random_override_len);
+ xfree (random_override);
+ }
+ }
+ else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lhash
+ && (ctx->op == PUBKEY_OP_SIGN || ctx->op == PUBKEY_OP_VERIFY))
+ {
+ if (sexp_length (lhash) != 3)
+ rc = GPG_ERR_INV_OBJ;
+ else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ const void * value;
+ size_t valuelen;
+
+ ctx->hash_algo = get_hash_algo (s, n);
+
+ if (!ctx->hash_algo)
+ rc = GPG_ERR_DIGEST_ALGO;
+ else if ( !(value=sexp_nth_data (lhash, 2, &valuelen))
+ || !valuelen )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ rc = _gcry_rsa_pkcs1_encode_for_sig (ret_mpi, ctx->nbits,
+ value, valuelen,
+ ctx->hash_algo);
+ }
+ }
+ else if (ctx->encoding == PUBKEY_ENC_PKCS1_RAW && lvalue
+ && (ctx->op == PUBKEY_OP_SIGN || ctx->op == PUBKEY_OP_VERIFY))
+ {
+ const void * value;
+ size_t valuelen;
+
+ if (sexp_length (lvalue) != 2)
+ rc = GPG_ERR_INV_OBJ;
+ else if ( !(value=sexp_nth_data (lvalue, 1, &valuelen))
+ || !valuelen )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ rc = _gcry_rsa_pkcs1_encode_raw_for_sig (ret_mpi, ctx->nbits,
+ value, valuelen);
+ }
+ else if (ctx->encoding == PUBKEY_ENC_OAEP && lvalue
+ && ctx->op == PUBKEY_OP_ENCRYPT)
+ {
+ const void * value;
+ size_t valuelen;
+
+ if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ gcry_sexp_t list;
+ void *random_override = NULL;
+ size_t random_override_len = 0;
+
+ /* Get HASH-ALGO. */
+ list = sexp_find_token (ldata, "hash-algo", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else
+ {
+ ctx->hash_algo = get_hash_algo (s, n);
+ if (!ctx->hash_algo)
+ rc = GPG_ERR_DIGEST_ALGO;
+ }
+ sexp_release (list);
+ if (rc)
+ goto leave;
+ }
+
+ /* Get LABEL. */
+ list = sexp_find_token (ldata, "label", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else if (n > 0)
+ {
+ ctx->label = xtrymalloc (n);
+ if (!ctx->label)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ memcpy (ctx->label, s, n);
+ ctx->labellen = n;
+ }
+ }
+ sexp_release (list);
+ if (rc)
+ goto leave;
+ }
+ /* Get optional RANDOM-OVERRIDE. */
+ list = sexp_find_token (ldata, "random-override", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else if (n > 0)
+ {
+ random_override = xtrymalloc (n);
+ if (!random_override)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ memcpy (random_override, s, n);
+ random_override_len = n;
+ }
+ }
+ sexp_release (list);
+ if (rc)
+ goto leave;
+ }
+
+ rc = _gcry_rsa_oaep_encode (ret_mpi, ctx->nbits, ctx->hash_algo,
+ value, valuelen,
+ ctx->label, ctx->labellen,
+ random_override, random_override_len);
+
+ xfree (random_override);
+ }
+ }
+ else if (ctx->encoding == PUBKEY_ENC_PSS && lhash
+ && ctx->op == PUBKEY_OP_SIGN)
+ {
+ if (sexp_length (lhash) != 3)
+ rc = GPG_ERR_INV_OBJ;
+ else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ const void * value;
+ size_t valuelen;
+ void *random_override = NULL;
+ size_t random_override_len = 0;
+
+ ctx->hash_algo = get_hash_algo (s, n);
+
+ if (!ctx->hash_algo)
+ rc = GPG_ERR_DIGEST_ALGO;
+ else if ( !(value=sexp_nth_data (lhash, 2, &valuelen))
+ || !valuelen )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ gcry_sexp_t list;
+
+ /* Get SALT-LENGTH. */
+ list = sexp_find_token (ldata, "salt-length", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ {
+ rc = GPG_ERR_NO_OBJ;
+ goto leave;
+ }
+ ctx->saltlen = (unsigned int)strtoul (s, NULL, 10);
+ sexp_release (list);
+ }
+
+ /* Get optional RANDOM-OVERRIDE. */
+ list = sexp_find_token (ldata, "random-override", 0);
+ if (list)
+ {
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ rc = GPG_ERR_NO_OBJ;
+ else if (n > 0)
+ {
+ random_override = xtrymalloc (n);
+ if (!random_override)
+ rc = gpg_err_code_from_syserror ();
+ else
+ {
+ memcpy (random_override, s, n);
+ random_override_len = n;
+ }
+ }
+ sexp_release (list);
+ if (rc)
+ goto leave;
+ }
+
+ /* Encode the data. (NBITS-1 is due to 8.1.1, step 1.) */
+ rc = _gcry_rsa_pss_encode (ret_mpi, ctx->nbits - 1,
+ ctx->hash_algo,
+ value, valuelen, ctx->saltlen,
+ random_override, random_override_len);
+
+ xfree (random_override);
+ }
+ }
+ }
+ else if (ctx->encoding == PUBKEY_ENC_PSS && lhash
+ && ctx->op == PUBKEY_OP_VERIFY)
+ {
+ if (sexp_length (lhash) != 3)
+ rc = GPG_ERR_INV_OBJ;
+ else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+ rc = GPG_ERR_INV_OBJ;
+ else
+ {
+ ctx->hash_algo = get_hash_algo (s, n);
+
+ if (!ctx->hash_algo)
+ rc = GPG_ERR_DIGEST_ALGO;
+ else
+ {
+ gcry_sexp_t list;
+ /* Get SALT-LENGTH. */
+ list = sexp_find_token (ldata, "salt-length", 0);
+ if (list)
+ {
+ unsigned long ul;
+
+ s = sexp_nth_data (list, 1, &n);
+ if (!s)
+ {
+ rc = GPG_ERR_NO_OBJ;
+ sexp_release (list);
+ goto leave;
+ }
+ ul = strtoul (s, NULL, 10);
+ if (ul > 16384)
+ {
+ rc = GPG_ERR_TOO_LARGE;
+ sexp_release (list);
+ goto leave;
+ }
+ ctx->saltlen = ul;
+ sexp_release (list);
+ }
+
+ *ret_mpi = sexp_nth_mpi (lhash, 2, GCRYMPI_FMT_USG);
+ if (!*ret_mpi)
+ rc = GPG_ERR_INV_OBJ;
+ ctx->verify_cmp = pss_verify_cmp;
+ ctx->verify_arg = *ret_mpi;
+ }
+ }
+ }
+ else
+ rc = GPG_ERR_CONFLICT;
+
+ leave:
+ sexp_release (ldata);
+ sexp_release (lhash);
+ sexp_release (lvalue);
+
+ if (!rc)
+ ctx->flags |= parsed_flags;
+ else
+ {
+ xfree (ctx->label);
+ ctx->label = NULL;
+ }
+
+ return rc;
+}
diff --git a/comm/third_party/libgcrypt/cipher/pubkey.c b/comm/third_party/libgcrypt/cipher/pubkey.c
new file mode 100644
index 0000000000..4c07e33bfc
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/pubkey.c
@@ -0,0 +1,970 @@
+/* pubkey.c - pubkey dispatcher
+ * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005,
+ * 2007, 2008, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "pubkey-internal.h"
+
+
+/* This is the list of the public-key algorithms included in
+ Libgcrypt. */
+static gcry_pk_spec_t * const pubkey_list[] =
+ {
+#if USE_ECC
+ &_gcry_pubkey_spec_ecc,
+#endif
+#if USE_RSA
+ &_gcry_pubkey_spec_rsa,
+#endif
+#if USE_DSA
+ &_gcry_pubkey_spec_dsa,
+#endif
+#if USE_ELGAMAL
+ &_gcry_pubkey_spec_elg,
+#endif
+ NULL
+ };
+
+
+static int
+map_algo (int algo)
+{
+ switch (algo)
+ {
+ case GCRY_PK_RSA_E: return GCRY_PK_RSA;
+ case GCRY_PK_RSA_S: return GCRY_PK_RSA;
+ case GCRY_PK_ELG_E: return GCRY_PK_ELG;
+ case GCRY_PK_ECDSA: return GCRY_PK_ECC;
+ case GCRY_PK_ECDH: return GCRY_PK_ECC;
+ default: return algo;
+ }
+}
+
+
+/* Return the spec structure for the public key algorithm ALGO. For
+ an unknown algorithm NULL is returned. */
+static gcry_pk_spec_t *
+spec_from_algo (int algo)
+{
+ int idx;
+ gcry_pk_spec_t *spec;
+
+ algo = map_algo (algo);
+
+ for (idx = 0; (spec = pubkey_list[idx]); idx++)
+ if (algo == spec->algo)
+ return spec;
+ return NULL;
+}
+
+
+/* Return the spec structure for the public key algorithm with NAME.
+ For an unknown name NULL is returned. */
+static gcry_pk_spec_t *
+spec_from_name (const char *name)
+{
+ gcry_pk_spec_t *spec;
+ int idx;
+ const char **aliases;
+
+ for (idx=0; (spec = pubkey_list[idx]); idx++)
+ {
+ if (!stricmp (name, spec->name))
+ return spec;
+ for (aliases = spec->aliases; *aliases; aliases++)
+ if (!stricmp (name, *aliases))
+ return spec;
+ }
+
+ return NULL;
+}
+
+
+
+/* Given the s-expression SEXP with the first element be either
+ * "private-key" or "public-key" return the spec structure for it. We
+ * look through the list to find a list beginning with "private-key"
+ * or "public-key" - the first one found is used. If WANT_PRIVATE is
+ * set the function will only succeed if a private key has been given.
+ * On success the spec is stored at R_SPEC. On error NULL is stored
+ * at R_SPEC and an error code returned. If R_PARMS is not NULL and
+ * the function returns success, the parameter list below
+ * "private-key" or "public-key" is stored there and the caller must
+ * call gcry_sexp_release on it.
+ */
+static gcry_err_code_t
+spec_from_sexp (gcry_sexp_t sexp, int want_private,
+ gcry_pk_spec_t **r_spec, gcry_sexp_t *r_parms)
+{
+ gcry_sexp_t list, l2;
+ char *name;
+ gcry_pk_spec_t *spec;
+
+ *r_spec = NULL;
+ if (r_parms)
+ *r_parms = NULL;
+
+ /* Check that the first element is valid. If we are looking for a
+ public key but a private key was supplied, we allow the use of
+ the private key anyway. The rationale for this is that the
+ private key is a superset of the public key. */
+ list = sexp_find_token (sexp, want_private? "private-key":"public-key", 0);
+ if (!list && !want_private)
+ list = sexp_find_token (sexp, "private-key", 0);
+ if (!list)
+ return GPG_ERR_INV_OBJ; /* Does not contain a key object. */
+
+ l2 = sexp_cadr (list);
+ sexp_release (list);
+ list = l2;
+ name = sexp_nth_string (list, 0);
+ if (!name)
+ {
+ sexp_release ( list );
+ return GPG_ERR_INV_OBJ; /* Invalid structure of object. */
+ }
+ spec = spec_from_name (name);
+ xfree (name);
+ if (!spec)
+ {
+ sexp_release (list);
+ return GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm. */
+ }
+ *r_spec = spec;
+ if (r_parms)
+ *r_parms = list;
+ else
+ sexp_release (list);
+ return 0;
+}
+
+
+
+/* Disable the use of the algorithm ALGO. This is not thread safe and
+ should thus be called early. */
+static void
+disable_pubkey_algo (int algo)
+{
+ gcry_pk_spec_t *spec = spec_from_algo (algo);
+
+ if (spec)
+ spec->flags.disabled = 1;
+}
+
+
+
+/*
+ * Map a string to the pubkey algo
+ */
+int
+_gcry_pk_map_name (const char *string)
+{
+ gcry_pk_spec_t *spec;
+
+ if (!string)
+ return 0;
+ spec = spec_from_name (string);
+ if (!spec)
+ return 0;
+ if (spec->flags.disabled)
+ return 0;
+ return spec->algo;
+}
+
+
+/* Map the public key algorithm whose ID is contained in ALGORITHM to
+ a string representation of the algorithm name. For unknown
+ algorithm IDs this functions returns "?". */
+const char *
+_gcry_pk_algo_name (int algo)
+{
+ gcry_pk_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (spec)
+ return spec->name;
+ return "?";
+}
+
+
+/****************
+ * A USE of 0 means: don't care.
+ */
+static gcry_err_code_t
+check_pubkey_algo (int algo, unsigned use)
+{
+ gcry_err_code_t err = 0;
+ gcry_pk_spec_t *spec;
+
+ spec = spec_from_algo (algo);
+ if (spec)
+ {
+ if (((use & GCRY_PK_USAGE_SIGN)
+ && (! (spec->use & GCRY_PK_USAGE_SIGN)))
+ || ((use & GCRY_PK_USAGE_ENCR)
+ && (! (spec->use & GCRY_PK_USAGE_ENCR))))
+ err = GPG_ERR_WRONG_PUBKEY_ALGO;
+ }
+ else
+ err = GPG_ERR_PUBKEY_ALGO;
+
+ return err;
+}
+
+
+/****************
+ * Return the number of public key material numbers
+ */
+static int
+pubkey_get_npkey (int algo)
+{
+ gcry_pk_spec_t *spec = spec_from_algo (algo);
+
+ return spec? strlen (spec->elements_pkey) : 0;
+}
+
+
+/****************
+ * Return the number of secret key material numbers
+ */
+static int
+pubkey_get_nskey (int algo)
+{
+ gcry_pk_spec_t *spec = spec_from_algo (algo);
+
+ return spec? strlen (spec->elements_skey) : 0;
+}
+
+
+/****************
+ * Return the number of signature material numbers
+ */
+static int
+pubkey_get_nsig (int algo)
+{
+ gcry_pk_spec_t *spec = spec_from_algo (algo);
+
+ return spec? strlen (spec->elements_sig) : 0;
+}
+
+/****************
+ * Return the number of encryption material numbers
+ */
+static int
+pubkey_get_nenc (int algo)
+{
+ gcry_pk_spec_t *spec = spec_from_algo (algo);
+
+ return spec? strlen (spec->elements_enc) : 0;
+}
+
+
+
+/*
+ Do a PK encrypt operation
+
+ Caller has to provide a public key as the SEXP pkey and data as a
+ SEXP with just one MPI in it. Alternatively S_DATA might be a
+ complex S-Expression, similar to the one used for signature
+ verification. This provides a flag which allows to handle PKCS#1
+ block type 2 padding. The function returns a sexp which may be
+ passed to to pk_decrypt.
+
+ Returns: 0 or an errorcode.
+
+ s_data = See comment for _gcry_pk_util_data_to_mpi
+ s_pkey = <key-as-defined-in-sexp_to_key>
+ r_ciph = (enc-val
+ (<algo>
+ (<param_name1> <mpi>)
+ ...
+ (<param_namen> <mpi>)
+ ))
+
+*/
+gcry_err_code_t
+_gcry_pk_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t s_pkey)
+{
+ gcry_err_code_t rc;
+ gcry_pk_spec_t *spec;
+ gcry_sexp_t keyparms;
+
+ *r_ciph = NULL;
+
+ rc = spec_from_sexp (s_pkey, 0, &spec, &keyparms);
+ if (rc)
+ goto leave;
+
+ if (spec->encrypt)
+ rc = spec->encrypt (r_ciph, s_data, keyparms);
+ else
+ rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+ sexp_release (keyparms);
+ return rc;
+}
+
+
+/*
+ Do a PK decrypt operation
+
+ Caller has to provide a secret key as the SEXP skey and data in a
+ format as created by gcry_pk_encrypt. For historic reasons the
+ function returns simply an MPI as an S-expression part; this is
+ deprecated and the new method should be used which returns a real
+ S-expressionl this is selected by adding at least an empty flags
+ list to S_DATA.
+
+ Returns: 0 or an errorcode.
+
+ s_data = (enc-val
+ [(flags [raw, pkcs1, oaep])]
+ (<algo>
+ (<param_name1> <mpi>)
+ ...
+ (<param_namen> <mpi>)
+ ))
+ s_skey = <key-as-defined-in-sexp_to_key>
+ r_plain= Either an incomplete S-expression without the parentheses
+ or if the flags list is used (even if empty) a real S-expression:
+ (value PLAIN). In raw mode (or no flags given) the returned value
+ is to be interpreted as a signed MPI, thus it may have an extra
+ leading zero octet even if not included in the original data.
+ With pkcs1 or oaep decoding enabled the returned value is a
+ verbatim octet string.
+ */
+gcry_err_code_t
+_gcry_pk_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t s_skey)
+{
+ gcry_err_code_t rc;
+ gcry_pk_spec_t *spec;
+ gcry_sexp_t keyparms;
+
+ *r_plain = NULL;
+
+ rc = spec_from_sexp (s_skey, 1, &spec, &keyparms);
+ if (rc)
+ goto leave;
+
+ if (spec->decrypt)
+ rc = spec->decrypt (r_plain, s_data, keyparms);
+ else
+ rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+ sexp_release (keyparms);
+ return rc;
+}
+
+
+
+/*
+ Create a signature.
+
+ Caller has to provide a secret key as the SEXP skey and data
+ expressed as a SEXP list hash with only one element which should
+ instantly be available as a MPI. Alternatively the structure given
+ below may be used for S_HASH, it provides the abiliy to pass flags
+ to the operation; the flags defined by now are "pkcs1" which does
+ PKCS#1 block type 1 style padding and "pss" for PSS encoding.
+
+ Returns: 0 or an errorcode.
+ In case of 0 the function returns a new SEXP with the
+ signature value; the structure of this signature depends on the
+ other arguments but is always suitable to be passed to
+ gcry_pk_verify
+
+ s_hash = See comment for _gcry-pk_util_data_to_mpi
+
+ s_skey = <key-as-defined-in-sexp_to_key>
+ r_sig = (sig-val
+ (<algo>
+ (<param_name1> <mpi>)
+ ...
+ (<param_namen> <mpi>))
+ [(hash algo)])
+
+ Note that (hash algo) in R_SIG is not used.
+*/
+gcry_err_code_t
+_gcry_pk_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_hash, gcry_sexp_t s_skey)
+{
+ gcry_err_code_t rc;
+ gcry_pk_spec_t *spec;
+ gcry_sexp_t keyparms;
+
+ *r_sig = NULL;
+
+ rc = spec_from_sexp (s_skey, 1, &spec, &keyparms);
+ if (rc)
+ goto leave;
+
+ if (spec->sign)
+ rc = spec->sign (r_sig, s_hash, keyparms);
+ else
+ rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+ sexp_release (keyparms);
+ return rc;
+}
+
+
+/*
+ Verify a signature.
+
+ Caller has to supply the public key pkey, the signature sig and his
+ hashvalue data. Public key has to be a standard public key given
+ as an S-Exp, sig is a S-Exp as returned from gcry_pk_sign and data
+ must be an S-Exp like the one in sign too. */
+gcry_err_code_t
+_gcry_pk_verify (gcry_sexp_t s_sig, gcry_sexp_t s_hash, gcry_sexp_t s_pkey)
+{
+ gcry_err_code_t rc;
+ gcry_pk_spec_t *spec;
+ gcry_sexp_t keyparms;
+
+ rc = spec_from_sexp (s_pkey, 0, &spec, &keyparms);
+ if (rc)
+ goto leave;
+
+ if (spec->verify)
+ rc = spec->verify (s_sig, s_hash, keyparms);
+ else
+ rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+ sexp_release (keyparms);
+ return rc;
+}
+
+
+/*
+ Test a key.
+
+ This may be used either for a public or a secret key to see whether
+ the internal structure is okay.
+
+ Returns: 0 or an errorcode.
+
+ NOTE: We currently support only secret key checking. */
+gcry_err_code_t
+_gcry_pk_testkey (gcry_sexp_t s_key)
+{
+ gcry_err_code_t rc;
+ gcry_pk_spec_t *spec;
+ gcry_sexp_t keyparms;
+
+ rc = spec_from_sexp (s_key, 1, &spec, &keyparms);
+ if (rc)
+ goto leave;
+
+ if (spec->check_secret_key)
+ rc = spec->check_secret_key (keyparms);
+ else
+ rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+ sexp_release (keyparms);
+ return rc;
+}
+
+
+/*
+ Create a public key pair and return it in r_key.
+ How the key is created depends on s_parms:
+ (genkey
+ (algo
+ (parameter_name_1 ....)
+ ....
+ (parameter_name_n ....)
+ ))
+ The key is returned in a format depending on the
+ algorithm. Both, private and secret keys are returned
+ and optionally some additional informatin.
+ For elgamal we return this structure:
+ (key-data
+ (public-key
+ (elg
+ (p <mpi>)
+ (g <mpi>)
+ (y <mpi>)
+ )
+ )
+ (private-key
+ (elg
+ (p <mpi>)
+ (g <mpi>)
+ (y <mpi>)
+ (x <mpi>)
+ )
+ )
+ (misc-key-info
+ (pm1-factors n1 n2 ... nn)
+ ))
+ */
+gcry_err_code_t
+_gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms)
+{
+ gcry_pk_spec_t *spec = NULL;
+ gcry_sexp_t list = NULL;
+ gcry_sexp_t l2 = NULL;
+ char *name = NULL;
+ gcry_err_code_t rc;
+
+ *r_key = NULL;
+
+ list = sexp_find_token (s_parms, "genkey", 0);
+ if (!list)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Does not contain genkey data. */
+ goto leave;
+ }
+
+ l2 = sexp_cadr (list);
+ sexp_release (list);
+ list = l2;
+ l2 = NULL;
+ if (! list)
+ {
+ rc = GPG_ERR_NO_OBJ; /* No cdr for the genkey. */
+ goto leave;
+ }
+
+ name = _gcry_sexp_nth_string (list, 0);
+ if (!name)
+ {
+ rc = GPG_ERR_INV_OBJ; /* Algo string missing. */
+ goto leave;
+ }
+
+ spec = spec_from_name (name);
+ xfree (name);
+ name = NULL;
+ if (!spec)
+ {
+ rc = GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm. */
+ goto leave;
+ }
+
+ if (spec->generate)
+ rc = spec->generate (list, r_key);
+ else
+ rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+ sexp_release (list);
+ xfree (name);
+ sexp_release (l2);
+
+ return rc;
+}
+
+
+/*
+ Get the number of nbits from the public key.
+
+ Hmmm: Should we have really this function or is it better to have a
+ more general function to retrieve different properties of the key? */
+unsigned int
+_gcry_pk_get_nbits (gcry_sexp_t key)
+{
+ gcry_pk_spec_t *spec;
+ gcry_sexp_t parms;
+ unsigned int nbits;
+
+ /* Parsing KEY might be considered too much overhead. For example
+ for RSA we would only need to look at P and stop parsing right
+ away. However, with ECC things are more complicate in that only
+ a curve name might be specified. Thus we need to tear the sexp
+ apart. */
+
+ if (spec_from_sexp (key, 0, &spec, &parms))
+ return 0; /* Error - 0 is a suitable indication for that. */
+
+ nbits = spec->get_nbits (parms);
+ sexp_release (parms);
+ return nbits;
+}
+
+
+/* Return the so called KEYGRIP which is the SHA-1 hash of the public
+ key parameters expressed in a way depending on the algorithm.
+
+ ARRAY must either be 20 bytes long or NULL; in the latter case a
+ newly allocated array of that size is returned, otherwise ARRAY or
+ NULL is returned to indicate an error which is most likely an
+ unknown algorithm. The function accepts public or secret keys. */
+unsigned char *
+_gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char *array)
+{
+ gcry_sexp_t list = NULL;
+ gcry_sexp_t l2 = NULL;
+ gcry_pk_spec_t *spec = NULL;
+ const char *s;
+ char *name = NULL;
+ int idx;
+ const char *elems;
+ gcry_md_hd_t md = NULL;
+ int okay = 0;
+
+ /* Check that the first element is valid. */
+ list = sexp_find_token (key, "public-key", 0);
+ if (! list)
+ list = sexp_find_token (key, "private-key", 0);
+ if (! list)
+ list = sexp_find_token (key, "protected-private-key", 0);
+ if (! list)
+ list = sexp_find_token (key, "shadowed-private-key", 0);
+ if (! list)
+ return NULL; /* No public- or private-key object. */
+
+ l2 = sexp_cadr (list);
+ sexp_release (list);
+ list = l2;
+ l2 = NULL;
+
+ name = _gcry_sexp_nth_string (list, 0);
+ if (!name)
+ goto fail; /* Invalid structure of object. */
+
+ spec = spec_from_name (name);
+ if (!spec)
+ goto fail; /* Unknown algorithm. */
+
+ elems = spec->elements_grip;
+ if (!elems)
+ goto fail; /* No grip parameter. */
+
+ if (_gcry_md_open (&md, GCRY_MD_SHA1, 0))
+ goto fail;
+
+ if (spec->comp_keygrip)
+ {
+ /* Module specific method to compute a keygrip. */
+ if (spec->comp_keygrip (md, list))
+ goto fail;
+ }
+ else
+ {
+ /* Generic method to compute a keygrip. */
+ for (idx = 0, s = elems; *s; s++, idx++)
+ {
+ const char *data;
+ size_t datalen;
+ char buf[30];
+
+ l2 = sexp_find_token (list, s, 1);
+ if (! l2)
+ goto fail;
+ data = sexp_nth_data (l2, 1, &datalen);
+ if (! data)
+ goto fail;
+
+ snprintf (buf, sizeof buf, "(1:%c%u:", *s, (unsigned int)datalen);
+ _gcry_md_write (md, buf, strlen (buf));
+ _gcry_md_write (md, data, datalen);
+ sexp_release (l2);
+ l2 = NULL;
+ _gcry_md_write (md, ")", 1);
+ }
+ }
+
+ if (!array)
+ {
+ array = xtrymalloc (20);
+ if (! array)
+ goto fail;
+ }
+
+ memcpy (array, _gcry_md_read (md, GCRY_MD_SHA1), 20);
+ okay = 1;
+
+ fail:
+ xfree (name);
+ sexp_release (l2);
+ _gcry_md_close (md);
+ sexp_release (list);
+ return okay? array : NULL;
+}
+
+
+
+const char *
+_gcry_pk_get_curve (gcry_sexp_t key, int iterator, unsigned int *r_nbits)
+{
+ const char *result = NULL;
+ gcry_pk_spec_t *spec;
+ gcry_sexp_t keyparms = NULL;
+
+ if (r_nbits)
+ *r_nbits = 0;
+
+ if (key)
+ {
+ iterator = 0;
+
+ if (spec_from_sexp (key, 0, &spec, &keyparms))
+ return NULL;
+ }
+ else
+ {
+ spec = spec_from_name ("ecc");
+ if (!spec)
+ return NULL;
+ }
+
+ if (spec->get_curve)
+ result = spec->get_curve (keyparms, iterator, r_nbits);
+
+ sexp_release (keyparms);
+ return result;
+}
+
+
+
+gcry_sexp_t
+_gcry_pk_get_param (int algo, const char *name)
+{
+ gcry_sexp_t result = NULL;
+ gcry_pk_spec_t *spec = NULL;
+
+ algo = map_algo (algo);
+
+ if (algo != GCRY_PK_ECC)
+ return NULL;
+
+ spec = spec_from_name ("ecc");
+ if (spec)
+ {
+ if (spec && spec->get_curve_param)
+ result = spec->get_curve_param (name);
+ }
+ return result;
+}
+
+
+
+gcry_err_code_t
+_gcry_pk_ctl (int cmd, void *buffer, size_t buflen)
+{
+ gcry_err_code_t rc = 0;
+
+ switch (cmd)
+ {
+ case GCRYCTL_DISABLE_ALGO:
+ /* This one expects a buffer pointing to an integer with the
+ algo number. */
+ if ((! buffer) || (buflen != sizeof (int)))
+ rc = GPG_ERR_INV_ARG;
+ else
+ disable_pubkey_algo (*((int *) buffer));
+ break;
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+
+ return rc;
+}
+
+
+/* Return information about the given algorithm
+
+ WHAT selects the kind of information returned:
+
+ GCRYCTL_TEST_ALGO:
+ Returns 0 when the specified algorithm is available for use.
+ Buffer must be NULL, nbytes may have the address of a variable
+ with the required usage of the algorithm. It may be 0 for don't
+ care or a combination of the GCRY_PK_USAGE_xxx flags;
+
+ GCRYCTL_GET_ALGO_USAGE:
+ Return the usage flags for the given algo. An invalid algo
+ returns 0. Disabled algos are ignored here because we
+ only want to know whether the algo is at all capable of
+ the usage.
+
+ Note: Because this function is in most cases used to return an
+ integer value, we can make it easier for the caller to just look at
+ the return value. The caller will in all cases consult the value
+ and thereby detecting whether a error occurred or not (i.e. while
+ checking the block size) */
+gcry_err_code_t
+_gcry_pk_algo_info (int algorithm, int what, void *buffer, size_t *nbytes)
+{
+ gcry_err_code_t rc = 0;
+
+ switch (what)
+ {
+ case GCRYCTL_TEST_ALGO:
+ {
+ int use = nbytes ? *nbytes : 0;
+ if (buffer)
+ rc = GPG_ERR_INV_ARG;
+ else if (check_pubkey_algo (algorithm, use))
+ rc = GPG_ERR_PUBKEY_ALGO;
+ break;
+ }
+
+ case GCRYCTL_GET_ALGO_USAGE:
+ {
+ gcry_pk_spec_t *spec;
+
+ spec = spec_from_algo (algorithm);
+ *nbytes = spec? spec->use : 0;
+ break;
+ }
+
+ case GCRYCTL_GET_ALGO_NPKEY:
+ {
+ /* FIXME? */
+ int npkey = pubkey_get_npkey (algorithm);
+ *nbytes = npkey;
+ break;
+ }
+ case GCRYCTL_GET_ALGO_NSKEY:
+ {
+ /* FIXME? */
+ int nskey = pubkey_get_nskey (algorithm);
+ *nbytes = nskey;
+ break;
+ }
+ case GCRYCTL_GET_ALGO_NSIGN:
+ {
+ /* FIXME? */
+ int nsign = pubkey_get_nsig (algorithm);
+ *nbytes = nsign;
+ break;
+ }
+ case GCRYCTL_GET_ALGO_NENCR:
+ {
+ /* FIXME? */
+ int nencr = pubkey_get_nenc (algorithm);
+ *nbytes = nencr;
+ break;
+ }
+
+ default:
+ rc = GPG_ERR_INV_OP;
+ }
+
+ return rc;
+}
+
+
+/* Return an S-expression representing the context CTX. Depending on
+ the state of that context, the S-expression may either be a public
+ key, a private key or any other object used with public key
+ operations. On success a new S-expression is stored at R_SEXP and
+ 0 is returned, on error NULL is store there and an error code is
+ returned. MODE is either 0 or one of the GCRY_PK_GET_xxx values.
+
+ As of now it only support certain ECC operations because a context
+ object is right now only defined for ECC. Over time this function
+ will be extended to cover more algorithms. Note also that the name
+ of the function is gcry_pubkey_xxx and not gcry_pk_xxx. The idea
+ is that we will eventually provide variants of the existing
+ gcry_pk_xxx functions which will take a context parameter. */
+gcry_err_code_t
+_gcry_pubkey_get_sexp (gcry_sexp_t *r_sexp, int mode, gcry_ctx_t ctx)
+{
+ mpi_ec_t ec;
+
+ if (!r_sexp)
+ return GPG_ERR_INV_VALUE;
+ *r_sexp = NULL;
+ switch (mode)
+ {
+ case 0:
+ case GCRY_PK_GET_PUBKEY:
+ case GCRY_PK_GET_SECKEY:
+ break;
+ default:
+ return GPG_ERR_INV_VALUE;
+ }
+ if (!ctx)
+ return GPG_ERR_NO_CRYPT_CTX;
+
+ ec = _gcry_ctx_find_pointer (ctx, CONTEXT_TYPE_EC);
+ if (ec)
+ return _gcry_pk_ecc_get_sexp (r_sexp, mode, ec);
+
+ return GPG_ERR_WRONG_CRYPT_CTX;
+}
+
+
+
+/* Explicitly initialize this module. */
+gcry_err_code_t
+_gcry_pk_init (void)
+{
+ if (fips_mode())
+ {
+ /* disable algorithms that are disallowed in fips */
+ int idx;
+ gcry_pk_spec_t *spec;
+
+ for (idx = 0; (spec = pubkey_list[idx]); idx++)
+ if (!spec->flags.fips)
+ spec->flags.disabled = 1;
+ }
+
+ return 0;
+}
+
+
+/* Run the selftests for pubkey algorithm ALGO with optional reporting
+ function REPORT. */
+gpg_error_t
+_gcry_pk_selftest (int algo, int extended, selftest_report_func_t report)
+{
+ gcry_err_code_t ec;
+ gcry_pk_spec_t *spec;
+
+ algo = map_algo (algo);
+ spec = spec_from_algo (algo);
+ if (spec && !spec->flags.disabled && spec->selftest)
+ ec = spec->selftest (algo, extended, report);
+ else
+ {
+ ec = GPG_ERR_PUBKEY_ALGO;
+ /* Fixme: We need to change the report function to allow passing
+ of an encryption mode (e.g. pkcs1, ecdsa, or ecdh). */
+ if (report)
+ report ("pubkey", algo, "module",
+ spec && !spec->flags.disabled?
+ "no selftest available" :
+ spec? "algorithm disabled" :
+ "algorithm not found");
+ }
+
+ return gpg_error (ec);
+}
diff --git a/comm/third_party/libgcrypt/cipher/rfc2268.c b/comm/third_party/libgcrypt/cipher/rfc2268.c
new file mode 100644
index 0000000000..f018b64038
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rfc2268.c
@@ -0,0 +1,378 @@
+/* rfc2268.c - The cipher described in rfc2268; aka Ron's Cipher 2.
+ * Copyright (C) 2003 Nikos Mavroyanopoulos
+ * Copyright (C) 2004 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* This implementation was written by Nikos Mavroyanopoulos for GNUTLS
+ * as a Libgcrypt module (gnutls/lib/x509/rc2.c) and later adapted for
+ * direct use by Libgcrypt by Werner Koch. This implementation is
+ * only useful for pkcs#12 decryption.
+ *
+ * The implementation here is based on Peter Gutmann's RRC.2 paper.
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "g10lib.h"
+#include "types.h"
+#include "cipher.h"
+#include "cipher-internal.h"
+
+#define RFC2268_BLOCKSIZE 8
+
+typedef struct
+{
+ u16 S[64];
+} RFC2268_context;
+
+static const unsigned char rfc2268_sbox[] = {
+ 217, 120, 249, 196, 25, 221, 181, 237,
+ 40, 233, 253, 121, 74, 160, 216, 157,
+ 198, 126, 55, 131, 43, 118, 83, 142,
+ 98, 76, 100, 136, 68, 139, 251, 162,
+ 23, 154, 89, 245, 135, 179, 79, 19,
+ 97, 69, 109, 141, 9, 129, 125, 50,
+ 189, 143, 64, 235, 134, 183, 123, 11,
+ 240, 149, 33, 34, 92, 107, 78, 130,
+ 84, 214, 101, 147, 206, 96, 178, 28,
+ 115, 86, 192, 20, 167, 140, 241, 220,
+ 18, 117, 202, 31, 59, 190, 228, 209,
+ 66, 61, 212, 48, 163, 60, 182, 38,
+ 111, 191, 14, 218, 70, 105, 7, 87,
+ 39, 242, 29, 155, 188, 148, 67, 3,
+ 248, 17, 199, 246, 144, 239, 62, 231,
+ 6, 195, 213, 47, 200, 102, 30, 215,
+ 8, 232, 234, 222, 128, 82, 238, 247,
+ 132, 170, 114, 172, 53, 77, 106, 42,
+ 150, 26, 210, 113, 90, 21, 73, 116,
+ 75, 159, 208, 94, 4, 24, 164, 236,
+ 194, 224, 65, 110, 15, 81, 203, 204,
+ 36, 145, 175, 80, 161, 244, 112, 57,
+ 153, 124, 58, 133, 35, 184, 180, 122,
+ 252, 2, 54, 91, 37, 85, 151, 49,
+ 45, 93, 250, 152, 227, 138, 146, 174,
+ 5, 223, 41, 16, 103, 108, 186, 201,
+ 211, 0, 230, 207, 225, 158, 168, 44,
+ 99, 22, 1, 63, 88, 226, 137, 169,
+ 13, 56, 52, 27, 171, 51, 255, 176,
+ 187, 72, 12, 95, 185, 177, 205, 46,
+ 197, 243, 219, 71, 229, 165, 156, 119,
+ 10, 166, 32, 104, 254, 127, 193, 173
+};
+
+#define rotl16(x,n) (((x) << ((u16)(n))) | ((x) >> (16 - (u16)(n))))
+#define rotr16(x,n) (((x) >> ((u16)(n))) | ((x) << (16 - (u16)(n))))
+
+static const char *selftest (void);
+
+
+static void
+do_encrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf)
+{
+ RFC2268_context *ctx = context;
+ register int i, j;
+ u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0;
+
+ word0 = (word0 << 8) | inbuf[1];
+ word0 = (word0 << 8) | inbuf[0];
+ word1 = (word1 << 8) | inbuf[3];
+ word1 = (word1 << 8) | inbuf[2];
+ word2 = (word2 << 8) | inbuf[5];
+ word2 = (word2 << 8) | inbuf[4];
+ word3 = (word3 << 8) | inbuf[7];
+ word3 = (word3 << 8) | inbuf[6];
+
+ for (i = 0; i < 16; i++)
+ {
+ j = i * 4;
+ /* For some reason I cannot combine those steps. */
+ word0 += (word1 & ~word3) + (word2 & word3) + ctx->S[j];
+ word0 = rotl16(word0, 1);
+
+ word1 += (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1];
+ word1 = rotl16(word1, 2);
+
+ word2 += (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2];
+ word2 = rotl16(word2, 3);
+
+ word3 += (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3];
+ word3 = rotl16(word3, 5);
+
+ if (i == 4 || i == 10)
+ {
+ word0 += ctx->S[word3 & 63];
+ word1 += ctx->S[word0 & 63];
+ word2 += ctx->S[word1 & 63];
+ word3 += ctx->S[word2 & 63];
+ }
+
+ }
+
+ outbuf[0] = word0 & 255;
+ outbuf[1] = word0 >> 8;
+ outbuf[2] = word1 & 255;
+ outbuf[3] = word1 >> 8;
+ outbuf[4] = word2 & 255;
+ outbuf[5] = word2 >> 8;
+ outbuf[6] = word3 & 255;
+ outbuf[7] = word3 >> 8;
+}
+
+static unsigned int
+encrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf)
+{
+ do_encrypt (context, outbuf, inbuf);
+ return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4);
+}
+
+static void
+do_decrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf)
+{
+ RFC2268_context *ctx = context;
+ register int i, j;
+ u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0;
+
+ word0 = (word0 << 8) | inbuf[1];
+ word0 = (word0 << 8) | inbuf[0];
+ word1 = (word1 << 8) | inbuf[3];
+ word1 = (word1 << 8) | inbuf[2];
+ word2 = (word2 << 8) | inbuf[5];
+ word2 = (word2 << 8) | inbuf[4];
+ word3 = (word3 << 8) | inbuf[7];
+ word3 = (word3 << 8) | inbuf[6];
+
+ for (i = 15; i >= 0; i--)
+ {
+ j = i * 4;
+
+ word3 = rotr16(word3, 5);
+ word3 -= (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3];
+
+ word2 = rotr16(word2, 3);
+ word2 -= (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2];
+
+ word1 = rotr16(word1, 2);
+ word1 -= (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1];
+
+ word0 = rotr16(word0, 1);
+ word0 -= (word1 & ~word3) + (word2 & word3) + ctx->S[j];
+
+ if (i == 5 || i == 11)
+ {
+ word3 = word3 - ctx->S[word2 & 63];
+ word2 = word2 - ctx->S[word1 & 63];
+ word1 = word1 - ctx->S[word0 & 63];
+ word0 = word0 - ctx->S[word3 & 63];
+ }
+
+ }
+
+ outbuf[0] = word0 & 255;
+ outbuf[1] = word0 >> 8;
+ outbuf[2] = word1 & 255;
+ outbuf[3] = word1 >> 8;
+ outbuf[4] = word2 & 255;
+ outbuf[5] = word2 >> 8;
+ outbuf[6] = word3 & 255;
+ outbuf[7] = word3 >> 8;
+}
+
+static unsigned int
+decrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf)
+{
+ do_decrypt (context, outbuf, inbuf);
+ return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4);
+}
+
+
+static gpg_err_code_t
+setkey_core (void *context, const unsigned char *key, unsigned int keylen, int with_phase2)
+{
+ static int initialized;
+ static const char *selftest_failed;
+ RFC2268_context *ctx = context;
+ unsigned int i;
+ unsigned char *S, x;
+ int len;
+ int bits = keylen * 8;
+
+ if (!initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+ if (selftest_failed)
+ log_error ("RFC2268 selftest failed (%s).\n", selftest_failed);
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if (keylen < 40 / 8) /* We want at least 40 bits. */
+ return GPG_ERR_INV_KEYLEN;
+
+ S = (unsigned char *) ctx->S;
+
+ for (i = 0; i < keylen; i++)
+ S[i] = key[i];
+
+ for (i = keylen; i < 128; i++)
+ S[i] = rfc2268_sbox[(S[i - keylen] + S[i - 1]) & 255];
+
+ S[0] = rfc2268_sbox[S[0]];
+
+ /* Phase 2 - reduce effective key size to "bits". This was not
+ * discussed in Gutmann's paper. I've copied that from the public
+ * domain code posted in sci.crypt. */
+ if (with_phase2)
+ {
+ len = (bits + 7) >> 3;
+ i = 128 - len;
+ x = rfc2268_sbox[S[i] & (255 >> (7 & -bits))];
+ S[i] = x;
+
+ while (i--)
+ {
+ x = rfc2268_sbox[x ^ S[i + len]];
+ S[i] = x;
+ }
+ }
+
+ /* Make the expanded key, endian independent. */
+ for (i = 0; i < 64; i++)
+ ctx->S[i] = ( (u16) S[i * 2] | (((u16) S[i * 2 + 1]) << 8));
+
+ return 0;
+}
+
+static gpg_err_code_t
+do_setkey (void *context, const unsigned char *key, unsigned int keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ (void)bulk_ops;
+ return setkey_core (context, key, keylen, 1);
+}
+
+static const char *
+selftest (void)
+{
+ RFC2268_context ctx;
+ unsigned char scratch[16];
+
+ /* Test vectors from Peter Gutmann's paper. */
+ static unsigned char key_1[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ static unsigned char plaintext_1[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ static const unsigned char ciphertext_1[] =
+ { 0x1C, 0x19, 0x8A, 0x83, 0x8D, 0xF0, 0x28, 0xB7 };
+
+ static unsigned char key_2[] =
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+ };
+ static unsigned char plaintext_2[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ static unsigned char ciphertext_2[] =
+ { 0x50, 0xDC, 0x01, 0x62, 0xBD, 0x75, 0x7F, 0x31 };
+
+ /* This one was checked against libmcrypt's RFC2268. */
+ static unsigned char key_3[] =
+ { 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ static unsigned char plaintext_3[] =
+ { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ static unsigned char ciphertext_3[] =
+ { 0x8f, 0xd1, 0x03, 0x89, 0x33, 0x6b, 0xf9, 0x5e };
+
+
+ /* First test. */
+ setkey_core (&ctx, key_1, sizeof(key_1), 0);
+ do_encrypt (&ctx, scratch, plaintext_1);
+
+ if (memcmp (scratch, ciphertext_1, sizeof(ciphertext_1)))
+ return "RFC2268 encryption test 1 failed.";
+
+ setkey_core (&ctx, key_1, sizeof(key_1), 0);
+ do_decrypt (&ctx, scratch, scratch);
+ if (memcmp (scratch, plaintext_1, sizeof(plaintext_1)))
+ return "RFC2268 decryption test 1 failed.";
+
+ /* Second test. */
+ setkey_core (&ctx, key_2, sizeof(key_2), 0);
+ do_encrypt (&ctx, scratch, plaintext_2);
+ if (memcmp (scratch, ciphertext_2, sizeof(ciphertext_2)))
+ return "RFC2268 encryption test 2 failed.";
+
+ setkey_core (&ctx, key_2, sizeof(key_2), 0);
+ do_decrypt (&ctx, scratch, scratch);
+ if (memcmp (scratch, plaintext_2, sizeof(plaintext_2)))
+ return "RFC2268 decryption test 2 failed.";
+
+ /* Third test. */
+ setkey_core(&ctx, key_3, sizeof(key_3), 0);
+ do_encrypt(&ctx, scratch, plaintext_3);
+
+ if (memcmp(scratch, ciphertext_3, sizeof(ciphertext_3)))
+ return "RFC2268 encryption test 3 failed.";
+
+ setkey_core (&ctx, key_3, sizeof(key_3), 0);
+ do_decrypt (&ctx, scratch, scratch);
+ if (memcmp(scratch, plaintext_3, sizeof(plaintext_3)))
+ return "RFC2268 decryption test 3 failed.";
+
+ return NULL;
+}
+
+
+
+static gcry_cipher_oid_spec_t oids_rfc2268_40[] =
+ {
+ /*{ "1.2.840.113549.3.2", GCRY_CIPHER_MODE_CBC },*/
+ /* pbeWithSHAAnd40BitRC2_CBC */
+ { "1.2.840.113549.1.12.1.6", GCRY_CIPHER_MODE_CBC },
+ { NULL }
+ };
+
+static gcry_cipher_oid_spec_t oids_rfc2268_128[] =
+ {
+ /* pbeWithSHAAnd128BitRC2_CBC */
+ { "1.2.840.113549.1.12.1.5", GCRY_CIPHER_MODE_CBC },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40 =
+ {
+ GCRY_CIPHER_RFC2268_40, {0, 0},
+ "RFC2268_40", NULL, oids_rfc2268_40,
+ RFC2268_BLOCKSIZE, 40, sizeof(RFC2268_context),
+ do_setkey, encrypt_block, decrypt_block
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128 =
+ {
+ GCRY_CIPHER_RFC2268_128, {0, 0},
+ "RFC2268_128", NULL, oids_rfc2268_128,
+ RFC2268_BLOCKSIZE, 128, sizeof(RFC2268_context),
+ do_setkey, encrypt_block, decrypt_block
+ };
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-aarch64.S b/comm/third_party/libgcrypt/cipher/rijndael-aarch64.S
new file mode 100644
index 0000000000..e77dd4e0b8
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-aarch64.S
@@ -0,0 +1,514 @@
+/* rijndael-aarch64.S - ARMv8/AArch64 assembly implementation of AES cipher
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__)
+#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+
+.text
+
+/* register macros */
+#define CTX x0
+#define RDST x1
+#define RSRC x2
+#define NROUNDS w3
+#define RTAB x4
+#define RMASK w5
+
+#define RA w8
+#define RB w9
+#define RC w10
+#define RD w11
+
+#define RNA w12
+#define RNB w13
+#define RNC w14
+#define RND w15
+
+#define RT0 w6
+#define RT1 w7
+#define RT2 w16
+#define xRT0 x6
+#define xRT1 x7
+#define xRT2 x16
+
+#define xw8 x8
+#define xw9 x9
+#define xw10 x10
+#define xw11 x11
+
+#define xw12 x12
+#define xw13 x13
+#define xw14 x14
+#define xw15 x15
+
+/***********************************************************************
+ * ARMv8/AArch64 assembly implementation of the AES cipher
+ ***********************************************************************/
+#define preload_first_key(round, ra) \
+ ldr ra, [CTX, #(((round) * 16) + 0 * 4)];
+
+#define dummy(round, ra) /* nothing */
+
+#define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ ldp rna, rnb, [CTX]; \
+ ldp rnc, rnd, [CTX, #8]; \
+ eor ra, ra, rna; \
+ eor rb, rb, rnb; \
+ eor rc, rc, rnc; \
+ preload_key(1, rna); \
+ eor rd, rd, rnd;
+
+#define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+ \
+ and RT0, RMASK, ra, lsl#2; \
+ ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+ and RT1, RMASK, ra, lsr#(8 - 2); \
+ ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+ and RT2, RMASK, ra, lsr#(16 - 2); \
+ ldr RT0, [RTAB, xRT0]; \
+ and ra, RMASK, ra, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rna, rna, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rd, lsl#2; \
+ ldr ra, [RTAB, x##ra]; \
+ \
+ eor rnd, rnd, RT1, ror #24; \
+ and RT1, RMASK, rd, lsr#(8 - 2); \
+ eor rnc, rnc, RT2, ror #16; \
+ and RT2, RMASK, rd, lsr#(16 - 2); \
+ eor rnb, rnb, ra, ror #8; \
+ ldr RT0, [RTAB, xRT0]; \
+ and rd, RMASK, rd, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rnd, rnd, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rc, lsl#2; \
+ ldr rd, [RTAB, x##rd]; \
+ \
+ eor rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#(8 - 2); \
+ eor rnb, rnb, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#(16 - 2); \
+ eor rna, rna, rd, ror #8; \
+ ldr RT0, [RTAB, xRT0]; \
+ and rc, RMASK, rc, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rnc, rnc, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rb, lsl#2; \
+ ldr rc, [RTAB, x##rc]; \
+ \
+ eor rnb, rnb, RT1, ror #24; \
+ and RT1, RMASK, rb, lsr#(8 - 2); \
+ eor rna, rna, RT2, ror #16; \
+ and RT2, RMASK, rb, lsr#(16 - 2); \
+ eor rnd, rnd, rc, ror #8; \
+ ldr RT0, [RTAB, xRT0]; \
+ and rb, RMASK, rb, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rnb, rnb, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ eor rna, rna, RT1, ror #24; \
+ ldr rb, [RTAB, x##rb]; \
+ \
+ eor rnd, rnd, RT2, ror #16; \
+ preload_key((next_r) + 1, ra); \
+ eor rnc, rnc, rb, ror #8;
+
+#define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ and RT0, RMASK, ra, lsl#2; \
+ and RT1, RMASK, ra, lsr#(8 - 2); \
+ and RT2, RMASK, ra, lsr#(16 - 2); \
+ ldrb rna, [RTAB, xRT0]; \
+ and ra, RMASK, ra, lsr#(24 - 2); \
+ ldrb rnd, [RTAB, xRT1]; \
+ and RT0, RMASK, rd, lsl#2; \
+ ldrb rnc, [RTAB, xRT2]; \
+ ror rnd, rnd, #24; \
+ ldrb rnb, [RTAB, x##ra]; \
+ and RT1, RMASK, rd, lsr#(8 - 2); \
+ ror rnc, rnc, #16; \
+ and RT2, RMASK, rd, lsr#(16 - 2); \
+ ror rnb, rnb, #8; \
+ ldrb RT0, [RTAB, xRT0]; \
+ and rd, RMASK, rd, lsr#(24 - 2); \
+ ldrb RT1, [RTAB, xRT1]; \
+ \
+ orr rnd, rnd, RT0; \
+ ldrb RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rc, lsl#2; \
+ ldrb rd, [RTAB, x##rd]; \
+ orr rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#(8 - 2); \
+ orr rnb, rnb, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#(16 - 2); \
+ orr rna, rna, rd, ror #8; \
+ ldrb RT0, [RTAB, xRT0]; \
+ and rc, RMASK, rc, lsr#(24 - 2); \
+ ldrb RT1, [RTAB, xRT1]; \
+ \
+ orr rnc, rnc, RT0; \
+ ldrb RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rb, lsl#2; \
+ ldrb rc, [RTAB, x##rc]; \
+ orr rnb, rnb, RT1, ror #24; \
+ and RT1, RMASK, rb, lsr#(8 - 2); \
+ orr rna, rna, RT2, ror #16; \
+ ldrb RT0, [RTAB, xRT0]; \
+ and RT2, RMASK, rb, lsr#(16 - 2); \
+ ldrb RT1, [RTAB, xRT1]; \
+ orr rnd, rnd, rc, ror #8; \
+ ldrb RT2, [RTAB, xRT2]; \
+ and rb, RMASK, rb, lsr#(24 - 2); \
+ ldrb rb, [RTAB, x##rb]; \
+ \
+ orr rnb, rnb, RT0; \
+ orr rna, rna, RT1, ror #24; \
+ orr rnd, rnd, RT2, ror #16; \
+ orr rnc, rnc, rb, ror #8;
+
+#define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \
+ do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key);
+
+#define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key);
+
+#define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ add CTX, CTX, #(((round) + 1) * 16); \
+ add RTAB, RTAB, #1; \
+ do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.globl _gcry_aes_arm_encrypt_block
+ELF(.type _gcry_aes_arm_encrypt_block,%function;)
+
+_gcry_aes_arm_encrypt_block:
+ /* input:
+ * %x0: keysched, CTX
+ * %x1: dst
+ * %x2: src
+ * %w3: number of rounds.. 10, 12 or 14
+ * %x4: encryption table
+ */
+ CFI_STARTPROC();
+
+ /* read input block */
+
+ /* aligned load */
+ ldp RA, RB, [RSRC];
+ ldp RC, RD, [RSRC, #8];
+#ifndef __AARCH64EL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+
+ mov RMASK, #(0xff<<2);
+
+ firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+ encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+
+ cmp NROUNDS, #12;
+ bge .Lenc_not_128;
+
+ encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+ lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+.Lenc_done:
+
+ /* store output block */
+
+ /* aligned store */
+#ifndef __AARCH64EL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+ /* write output block */
+ stp RA, RB, [RDST];
+ stp RC, RD, [RDST, #8];
+
+ mov x0, #(0);
+ ret;
+
+.ltorg
+.Lenc_not_128:
+ beq .Lenc_192
+
+ encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+ lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+ b .Lenc_done;
+
+.ltorg
+.Lenc_192:
+ encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+ lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+ b .Lenc_done;
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;)
+
+#define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
+ ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \
+ eor ra, ra, rna; \
+ ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \
+ eor rb, rb, rnb; \
+ ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \
+ eor rc, rc, rnc; \
+ preload_first_key((round) - 1, rna); \
+ eor rd, rd, rnd;
+
+#define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+ \
+ and RT0, RMASK, ra, lsl#2; \
+ ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+ and RT1, RMASK, ra, lsr#(8 - 2); \
+ ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+ and RT2, RMASK, ra, lsr#(16 - 2); \
+ ldr RT0, [RTAB, xRT0]; \
+ and ra, RMASK, ra, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rna, rna, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rb, lsl#2; \
+ ldr ra, [RTAB, x##ra]; \
+ \
+ eor rnb, rnb, RT1, ror #24; \
+ and RT1, RMASK, rb, lsr#(8 - 2); \
+ eor rnc, rnc, RT2, ror #16; \
+ and RT2, RMASK, rb, lsr#(16 - 2); \
+ eor rnd, rnd, ra, ror #8; \
+ ldr RT0, [RTAB, xRT0]; \
+ and rb, RMASK, rb, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rnb, rnb, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rc, lsl#2; \
+ ldr rb, [RTAB, x##rb]; \
+ \
+ eor rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#(8 - 2); \
+ eor rnd, rnd, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#(16 - 2); \
+ eor rna, rna, rb, ror #8; \
+ ldr RT0, [RTAB, xRT0]; \
+ and rc, RMASK, rc, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rnc, rnc, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rd, lsl#2; \
+ ldr rc, [RTAB, x##rc]; \
+ \
+ eor rnd, rnd, RT1, ror #24; \
+ and RT1, RMASK, rd, lsr#(8 - 2); \
+ eor rna, rna, RT2, ror #16; \
+ and RT2, RMASK, rd, lsr#(16 - 2); \
+ eor rnb, rnb, rc, ror #8; \
+ ldr RT0, [RTAB, xRT0]; \
+ and rd, RMASK, rd, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, xRT1]; \
+ eor rnd, rnd, RT0; \
+ ldr RT2, [RTAB, xRT2]; \
+ eor rna, rna, RT1, ror #24; \
+ ldr rd, [RTAB, x##rd]; \
+ \
+ eor rnb, rnb, RT2, ror #16; \
+ preload_key((next_r) - 1, ra); \
+ eor rnc, rnc, rd, ror #8;
+
+#define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ and RT0, RMASK, ra; \
+ and RT1, RMASK, ra, lsr#8; \
+ and RT2, RMASK, ra, lsr#16; \
+ ldrb rna, [RTAB, xRT0]; \
+ lsr ra, ra, #24; \
+ ldrb rnb, [RTAB, xRT1]; \
+ and RT0, RMASK, rb; \
+ ldrb rnc, [RTAB, xRT2]; \
+ ror rnb, rnb, #24; \
+ ldrb rnd, [RTAB, x##ra]; \
+ and RT1, RMASK, rb, lsr#8; \
+ ror rnc, rnc, #16; \
+ and RT2, RMASK, rb, lsr#16; \
+ ror rnd, rnd, #8; \
+ ldrb RT0, [RTAB, xRT0]; \
+ lsr rb, rb, #24; \
+ ldrb RT1, [RTAB, xRT1]; \
+ \
+ orr rnb, rnb, RT0; \
+ ldrb RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rc; \
+ ldrb rb, [RTAB, x##rb]; \
+ orr rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#8; \
+ orr rnd, rnd, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#16; \
+ orr rna, rna, rb, ror #8; \
+ ldrb RT0, [RTAB, xRT0]; \
+ lsr rc, rc, #24; \
+ ldrb RT1, [RTAB, xRT1]; \
+ \
+ orr rnc, rnc, RT0; \
+ ldrb RT2, [RTAB, xRT2]; \
+ and RT0, RMASK, rd; \
+ ldrb rc, [RTAB, x##rc]; \
+ orr rnd, rnd, RT1, ror #24; \
+ and RT1, RMASK, rd, lsr#8; \
+ orr rna, rna, RT2, ror #16; \
+ ldrb RT0, [RTAB, xRT0]; \
+ and RT2, RMASK, rd, lsr#16; \
+ ldrb RT1, [RTAB, xRT1]; \
+ orr rnb, rnb, rc, ror #8; \
+ ldrb RT2, [RTAB, xRT2]; \
+ lsr rd, rd, #24; \
+ ldrb rd, [RTAB, x##rd]; \
+ \
+ orr rnd, rnd, RT0; \
+ orr rna, rna, RT1, ror #24; \
+ orr rnb, rnb, RT2, ror #16; \
+ orr rnc, rnc, rd, ror #8;
+
+#define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+ do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key);
+
+#define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key);
+
+#define set_last_round_rmask(_, __) \
+ mov RMASK, #0xff;
+
+#define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ add RTAB, RTAB, #(4 * 256); \
+ do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.globl _gcry_aes_arm_decrypt_block
+ELF(.type _gcry_aes_arm_decrypt_block,%function;)
+
+_gcry_aes_arm_decrypt_block:
+ /* input:
+ * %x0: keysched, CTX
+ * %x1: dst
+ * %x2: src
+ * %w3: number of rounds.. 10, 12 or 14
+ * %x4: decryption table
+ */
+ CFI_STARTPROC();
+
+ /* read input block */
+
+ /* aligned load */
+ ldp RA, RB, [RSRC];
+ ldp RC, RD, [RSRC, #8];
+#ifndef __AARCH64EL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+
+ mov RMASK, #(0xff << 2);
+
+ cmp NROUNDS, #12;
+ bge .Ldec_256;
+
+ firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+.Ldec_tail:
+ decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask);
+ lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+ /* store output block */
+
+ /* aligned store */
+#ifndef __AARCH64EL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+ /* write output block */
+ stp RA, RB, [RDST];
+ stp RC, RD, [RDST, #8];
+
+ mov x0, #(0);
+ ret;
+
+.ltorg
+.Ldec_256:
+ beq .Ldec_192;
+
+ firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+ decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+ b .Ldec_tail;
+
+.ltorg
+.Ldec_192:
+ firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+ decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+ b .Ldec_tail;
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;)
+
+#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
+#endif /*__AARCH64EL__ */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-aesni.c b/comm/third_party/libgcrypt/cipher/rijndael-aesni.c
new file mode 100644
index 0000000000..95ec4c2bb7
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-aesni.c
@@ -0,0 +1,3965 @@
+/* AES-NI accelerated AES for Libgcrypt
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ * 2008, 2011, 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_AESNI
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+# pragma GCC target("no-sse")
+#endif
+#if __clang__
+# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+typedef struct u128_s
+{
+ u32 a, b, c, d;
+} __attribute__((packed, aligned(1), may_alias)) u128_t;
+
+
+/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l
+ because of 'pragma target'. */
+static ASM_FUNC_ATTR_INLINE const unsigned char *
+aes_ocb_get_l (gcry_cipher_hd_t c, u64 n)
+{
+ unsigned long ntz;
+
+ /* Assumes that N != 0. */
+ asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [low] "r" ((unsigned long)n)
+ : "cc");
+
+ return c->u_mode.ocb.L[ntz];
+}
+
+
+/* Two macros to be called prior and after the use of AESNI
+ instructions. There should be no external function calls between
+ the use of these macros. There purpose is to make sure that the
+ SSE regsiters are cleared and won't reveal any information about
+ the key or the data. */
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define aesni_prepare_2_7_variable char win64tmp[16 * 2]
+# define aesni_prepare_8_15_variable char win64tmp8_15[16 * 8]
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_7() \
+ do { asm volatile ("movdqu %%xmm6, %0\n\t" \
+ "movdqu %%xmm7, %1\n\t" \
+ : "=m" (*win64tmp), "=m" (*(win64tmp+16)) \
+ : \
+ : "memory"); \
+ } while (0)
+# define aesni_prepare_8_15() \
+ do { asm volatile ("movdqu %%xmm8, 0*16(%0)\n\t" \
+ "movdqu %%xmm9, 1*16(%0)\n\t" \
+ "movdqu %%xmm10, 2*16(%0)\n\t" \
+ "movdqu %%xmm11, 3*16(%0)\n\t" \
+ "movdqu %%xmm12, 4*16(%0)\n\t" \
+ "movdqu %%xmm13, 5*16(%0)\n\t" \
+ "movdqu %%xmm14, 6*16(%0)\n\t" \
+ "movdqu %%xmm15, 7*16(%0)\n\t" \
+ : \
+ : "r" (win64tmp8_15) \
+ : "memory"); \
+ } while (0)
+# define aesni_cleanup() \
+ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
+ "pxor %%xmm1, %%xmm1\n" :: ); \
+ } while (0)
+# define aesni_cleanup_2_7() \
+ do { asm volatile ("movdqu %0, %%xmm6\n\t" \
+ "movdqu %1, %%xmm7\n\t" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ : \
+ : "m" (*win64tmp), "m" (*(win64tmp+16)) \
+ : "memory"); \
+ } while (0)
+# define aesni_cleanup_8_15() \
+ do { asm volatile ("movdqu 0*16(%0), %%xmm8\n\t" \
+ "movdqu 1*16(%0), %%xmm9\n\t" \
+ "movdqu 2*16(%0), %%xmm10\n\t" \
+ "movdqu 3*16(%0), %%xmm11\n\t" \
+ "movdqu 4*16(%0), %%xmm12\n\t" \
+ "movdqu 5*16(%0), %%xmm13\n\t" \
+ "movdqu 6*16(%0), %%xmm14\n\t" \
+ "movdqu 7*16(%0), %%xmm15\n\t" \
+ : \
+ : "r" (win64tmp8_15) \
+ : "memory"); \
+ } while (0)
+#else
+# define aesni_prepare_2_7_variable
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_7() do { } while (0)
+# define aesni_cleanup() \
+ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
+ "pxor %%xmm1, %%xmm1\n" :: ); \
+ } while (0)
+# define aesni_cleanup_2_7() \
+ do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \
+ "pxor %%xmm2, %%xmm2\n\t" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "pxor %%xmm6, %%xmm6\n":: ); \
+ } while (0)
+# ifdef __x86_64__
+# define aesni_prepare_8_15_variable
+# define aesni_prepare_8_15() do { } while (0)
+# define aesni_cleanup_8_15() \
+ do { asm volatile ("pxor %%xmm8, %%xmm8\n" \
+ "pxor %%xmm9, %%xmm9\n" \
+ "pxor %%xmm10, %%xmm10\n" \
+ "pxor %%xmm11, %%xmm11\n" \
+ "pxor %%xmm12, %%xmm12\n" \
+ "pxor %%xmm13, %%xmm13\n" \
+ "pxor %%xmm14, %%xmm14\n" \
+ "pxor %%xmm15, %%xmm15\n":: ); \
+ } while (0)
+# endif
+#endif
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare();
+ aesni_prepare_2_7();
+
+ if (ctx->rounds < 12)
+ {
+ /* 128-bit key */
+#define AESKEYGENASSIST_xmm1_xmm2(imm8) \
+ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t"
+#define AESKEY_EXPAND128 \
+ "pshufd $0xff, %%xmm2, %%xmm2\n\t" \
+ "movdqa %%xmm1, %%xmm3\n\t" \
+ "pslldq $4, %%xmm3\n\t" \
+ "pxor %%xmm3, %%xmm1\n\t" \
+ "pslldq $4, %%xmm3\n\t" \
+ "pxor %%xmm3, %%xmm1\n\t" \
+ "pslldq $4, %%xmm3\n\t" \
+ "pxor %%xmm3, %%xmm2\n\t" \
+ "pxor %%xmm2, %%xmm1\n\t"
+
+ asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key */
+ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x01)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x10(%[ksch])\n\t" /* ksch[1] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x02)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x04)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x08)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x10)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x50(%[ksch])\n\t" /* ksch[5] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x20)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x40)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x70(%[ksch])\n\t" /* ksch[7] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x80)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x1b)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x36)
+ AESKEY_EXPAND128
+ "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */
+ :
+ : [key] "r" (key), [ksch] "r" (ctx->keyschenc)
+ : "cc", "memory" );
+#undef AESKEYGENASSIST_xmm1_xmm2
+#undef AESKEY_EXPAND128
+ }
+ else if (ctx->rounds == 12)
+ {
+ /* 192-bit key */
+#define AESKEYGENASSIST_xmm3_xmm2(imm8) \
+ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t"
+#define AESKEY_EXPAND192 \
+ "pshufd $0x55, %%xmm2, %%xmm2\n\t" \
+ "movdqu %%xmm1, %%xmm4\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm1\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm1\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm1\n\t" \
+ "pxor %%xmm2, %%xmm1\n\t" \
+ "pshufd $0xff, %%xmm1, %%xmm2\n\t" \
+ "movdqu %%xmm3, %%xmm4\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm3\n\t" \
+ "pxor %%xmm2, %%xmm3\n\t"
+
+ asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */
+ "movq 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..23] */
+ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */
+ "movdqa %%xmm3, %%xmm5\n\t"
+
+ AESKEYGENASSIST_xmm3_xmm2(0x01)
+ AESKEY_EXPAND192
+ "shufpd $0, %%xmm1, %%xmm5\n\t"
+ "movdqa %%xmm5, 0x10(%[ksch])\n\t" /* ksch[1] := xmm5 */
+ "movdqa %%xmm1, %%xmm6\n\t"
+ "shufpd $1, %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm6, 0x20(%[ksch])\n\t" /* ksch[2] := xmm6 */
+ AESKEYGENASSIST_xmm3_xmm2(0x02)
+ AESKEY_EXPAND192
+ "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */
+ "movdqa %%xmm3, %%xmm5\n\t"
+
+ AESKEYGENASSIST_xmm3_xmm2(0x04)
+ AESKEY_EXPAND192
+ "shufpd $0, %%xmm1, %%xmm5\n\t"
+ "movdqa %%xmm5, 0x40(%[ksch])\n\t" /* ksch[4] := xmm5 */
+ "movdqa %%xmm1, %%xmm6\n\t"
+ "shufpd $1, %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm6, 0x50(%[ksch])\n\t" /* ksch[5] := xmm6 */
+ AESKEYGENASSIST_xmm3_xmm2(0x08)
+ AESKEY_EXPAND192
+ "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */
+ "movdqa %%xmm3, %%xmm5\n\t"
+
+ AESKEYGENASSIST_xmm3_xmm2(0x10)
+ AESKEY_EXPAND192
+ "shufpd $0, %%xmm1, %%xmm5\n\t"
+ "movdqa %%xmm5, 0x70(%[ksch])\n\t" /* ksch[7] := xmm5 */
+ "movdqa %%xmm1, %%xmm6\n\t"
+ "shufpd $1, %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm6, 0x80(%[ksch])\n\t" /* ksch[8] := xmm6 */
+ AESKEYGENASSIST_xmm3_xmm2(0x20)
+ AESKEY_EXPAND192
+ "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */
+ "movdqa %%xmm3, %%xmm5\n\t"
+
+ AESKEYGENASSIST_xmm3_xmm2(0x40)
+ AESKEY_EXPAND192
+ "shufpd $0, %%xmm1, %%xmm5\n\t"
+ "movdqa %%xmm5, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm5 */
+ "movdqa %%xmm1, %%xmm6\n\t"
+ "shufpd $1, %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm6, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm6 */
+ AESKEYGENASSIST_xmm3_xmm2(0x80)
+ AESKEY_EXPAND192
+ "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */
+ :
+ : [key] "r" (key), [ksch] "r" (ctx->keyschenc)
+ : "cc", "memory" );
+#undef AESKEYGENASSIST_xmm3_xmm2
+#undef AESKEY_EXPAND192
+ }
+ else if (ctx->rounds > 12)
+ {
+ /* 256-bit key */
+#define AESKEYGENASSIST_xmm1_xmm2(imm8) \
+ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t"
+#define AESKEYGENASSIST_xmm3_xmm2(imm8) \
+ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t"
+#define AESKEY_EXPAND256_A \
+ "pshufd $0xff, %%xmm2, %%xmm2\n\t" \
+ "movdqa %%xmm1, %%xmm4\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm1\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm1\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm1\n\t" \
+ "pxor %%xmm2, %%xmm1\n\t"
+#define AESKEY_EXPAND256_B \
+ "pshufd $0xaa, %%xmm2, %%xmm2\n\t" \
+ "movdqa %%xmm3, %%xmm4\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm3\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm3\n\t" \
+ "pslldq $4, %%xmm4\n\t" \
+ "pxor %%xmm4, %%xmm3\n\t" \
+ "pxor %%xmm2, %%xmm3\n\t"
+
+ asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */
+ "movdqu 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..31] */
+ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */
+ "movdqa %%xmm3, 0x10(%[ksch])\n\t" /* ksch[1] := xmm3 */
+
+ AESKEYGENASSIST_xmm3_xmm2(0x01)
+ AESKEY_EXPAND256_A
+ "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x00)
+ AESKEY_EXPAND256_B
+ "movdqa %%xmm3, 0x30(%[ksch])\n\t" /* ksch[3] := xmm3 */
+
+ AESKEYGENASSIST_xmm3_xmm2(0x02)
+ AESKEY_EXPAND256_A
+ "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x00)
+ AESKEY_EXPAND256_B
+ "movdqa %%xmm3, 0x50(%[ksch])\n\t" /* ksch[5] := xmm3 */
+
+ AESKEYGENASSIST_xmm3_xmm2(0x04)
+ AESKEY_EXPAND256_A
+ "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x00)
+ AESKEY_EXPAND256_B
+ "movdqa %%xmm3, 0x70(%[ksch])\n\t" /* ksch[7] := xmm3 */
+
+ AESKEYGENASSIST_xmm3_xmm2(0x08)
+ AESKEY_EXPAND256_A
+ "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x00)
+ AESKEY_EXPAND256_B
+ "movdqa %%xmm3, 0x90(%[ksch])\n\t" /* ksch[9] := xmm3 */
+
+ AESKEYGENASSIST_xmm3_xmm2(0x10)
+ AESKEY_EXPAND256_A
+ "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x00)
+ AESKEY_EXPAND256_B
+ "movdqa %%xmm3, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm3 */
+
+ AESKEYGENASSIST_xmm3_xmm2(0x20)
+ AESKEY_EXPAND256_A
+ "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */
+ AESKEYGENASSIST_xmm1_xmm2(0x00)
+ AESKEY_EXPAND256_B
+ "movdqa %%xmm3, 0xd0(%[ksch])\n\t" /* ksch[13] := xmm3 */
+
+ AESKEYGENASSIST_xmm3_xmm2(0x40)
+ AESKEY_EXPAND256_A
+ "movdqa %%xmm1, 0xe0(%[ksch])\n\t" /* ksch[14] := xmm1 */
+
+ :
+ : [key] "r" (key), [ksch] "r" (ctx->keyschenc)
+ : "cc", "memory" );
+#undef AESKEYGENASSIST_xmm1_xmm2
+#undef AESKEYGENASSIST_xmm3_xmm2
+#undef AESKEY_EXPAND256_A
+#undef AESKEY_EXPAND256_B
+ }
+
+ aesni_cleanup();
+ aesni_cleanup_2_7();
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ /* The AES-NI decrypt instructions use the Equivalent Inverse
+ Cipher, thus we can't use the the standard decrypt key
+ preparation. */
+ u128_t *ekey = (u128_t *)ctx->keyschenc;
+ u128_t *dkey = (u128_t *)ctx->keyschdec;
+ int rr;
+ int r;
+
+#define DO_AESNI_AESIMC() \
+ asm volatile ("movdqa %[ekey], %%xmm1\n\t" \
+ /*"aesimc %%xmm1, %%xmm1\n\t"*/ \
+ ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" \
+ "movdqa %%xmm1, %[dkey]" \
+ : [dkey] "=m" (dkey[r]) \
+ : [ekey] "m" (ekey[rr]) \
+ : "memory")
+
+ dkey[0] = ekey[ctx->rounds];
+ r=1;
+ rr=ctx->rounds-1;
+ DO_AESNI_AESIMC(); r++; rr--; /* round 1 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 2 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 3 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 4 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 5 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 6 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 7 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 8 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 9 */
+ if (ctx->rounds > 10)
+ {
+ DO_AESNI_AESIMC(); r++; rr--; /* round 10 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 11 */
+ if (ctx->rounds > 12)
+ {
+ DO_AESNI_AESIMC(); r++; rr--; /* round 12 */
+ DO_AESNI_AESIMC(); r++; rr--; /* round 13 */
+ }
+ }
+
+ dkey[r] = ekey[0];
+
+#undef DO_AESNI_AESIMC
+}
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ aesni_prepare();
+ do_aesni_prepare_decryption (ctx);
+ aesni_cleanup();
+}
+
+
+/* Encrypt one block using the Intel AES-NI instructions. Block is input
+ * and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_enc (const RIJNDAEL_context *ctx)
+{
+#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+ asm volatile ("movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
+ "movdqa 0x10(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x20(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x30(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x40(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x50(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x60(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x70(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x80(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x90(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xa0(%[key]), %%xmm1\n\t"
+ "cmpl $10, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xb0(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xc0(%[key]), %%xmm1\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xd0(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xe0(%[key]), %%xmm1\n"
+
+ ".Lenclast%=:\n\t"
+ aesenclast_xmm1_xmm0
+ "\n"
+ :
+ : [key] "r" (ctx->keyschenc),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm1_xmm0
+}
+
+
+/* Decrypt one block using the Intel AES-NI instructions. Block is input
+ * and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_dec (const RIJNDAEL_context *ctx)
+{
+#define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t"
+#define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t"
+ asm volatile ("movdqa (%[key]), %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
+ "movdqa 0x10(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x20(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x30(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x40(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x50(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x60(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x70(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x80(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0x90(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0xa0(%[key]), %%xmm1\n\t"
+ "cmpl $10, %[rounds]\n\t"
+ "jz .Ldeclast%=\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0xb0(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0xc0(%[key]), %%xmm1\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "jz .Ldeclast%=\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0xd0(%[key]), %%xmm1\n\t"
+ aesdec_xmm1_xmm0
+ "movdqa 0xe0(%[key]), %%xmm1\n"
+
+ ".Ldeclast%=:\n\t"
+ aesdeclast_xmm1_xmm0
+ "\n"
+ :
+ : [key] "r" (ctx->keyschdec),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+#undef aesdec_xmm1_xmm0
+#undef aesdeclast_xmm1_xmm0
+}
+
+
+/* Encrypt four blocks using the Intel AES-NI instructions. Blocks are input
+ * and output through SSE registers xmm1 to xmm4. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_enc_vec4 (const RIJNDAEL_context *ctx)
+{
+#define aesenc_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t"
+#define aesenc_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd0\n\t"
+#define aesenc_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd8\n\t"
+#define aesenc_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe0\n\t"
+#define aesenclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc8\n\t"
+#define aesenclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd0\n\t"
+#define aesenclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd8\n\t"
+#define aesenclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe0\n\t"
+ asm volatile ("movdqa (%[key]), %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */
+ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */
+ "movdqa 0x10(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x20(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x30(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x40(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x50(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x60(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x70(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x80(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0x90(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0xa0(%[key]), %%xmm0\n\t"
+ "cmpl $10, %[rounds]\n\t"
+ "jz .Ldeclast%=\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0xb0(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0xc0(%[key]), %%xmm0\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "jz .Ldeclast%=\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0xd0(%[key]), %%xmm0\n\t"
+ aesenc_xmm0_xmm1
+ aesenc_xmm0_xmm2
+ aesenc_xmm0_xmm3
+ aesenc_xmm0_xmm4
+ "movdqa 0xe0(%[key]), %%xmm0\n"
+
+ ".Ldeclast%=:\n\t"
+ aesenclast_xmm0_xmm1
+ aesenclast_xmm0_xmm2
+ aesenclast_xmm0_xmm3
+ aesenclast_xmm0_xmm4
+ : /* no output */
+ : [key] "r" (ctx->keyschenc),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+#undef aesenc_xmm0_xmm1
+#undef aesenc_xmm0_xmm2
+#undef aesenc_xmm0_xmm3
+#undef aesenc_xmm0_xmm4
+#undef aesenclast_xmm0_xmm1
+#undef aesenclast_xmm0_xmm2
+#undef aesenclast_xmm0_xmm3
+#undef aesenclast_xmm0_xmm4
+}
+
+
+/* Decrypt four blocks using the Intel AES-NI instructions. Blocks are input
+ * and output through SSE registers xmm1 to xmm4. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_dec_vec4 (const RIJNDAEL_context *ctx)
+{
+#define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t"
+#define aesdec_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd0\n\t"
+#define aesdec_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd8\n\t"
+#define aesdec_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xde, 0xe0\n\t"
+#define aesdeclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc8\n\t"
+#define aesdeclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd0\n\t"
+#define aesdeclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd8\n\t"
+#define aesdeclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xe0\n\t"
+ asm volatile ("movdqa (%[key]), %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */
+ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */
+ "movdqa 0x10(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x20(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x30(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x40(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x50(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x60(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x70(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x80(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0x90(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0xa0(%[key]), %%xmm0\n\t"
+ "cmpl $10, %[rounds]\n\t"
+ "jz .Ldeclast%=\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0xb0(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0xc0(%[key]), %%xmm0\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "jz .Ldeclast%=\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0xd0(%[key]), %%xmm0\n\t"
+ aesdec_xmm0_xmm1
+ aesdec_xmm0_xmm2
+ aesdec_xmm0_xmm3
+ aesdec_xmm0_xmm4
+ "movdqa 0xe0(%[key]), %%xmm0\n"
+
+ ".Ldeclast%=:\n\t"
+ aesdeclast_xmm0_xmm1
+ aesdeclast_xmm0_xmm2
+ aesdeclast_xmm0_xmm3
+ aesdeclast_xmm0_xmm4
+ : /* no output */
+ : [key] "r" (ctx->keyschdec),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+#undef aesdec_xmm0_xmm1
+#undef aesdec_xmm0_xmm2
+#undef aesdec_xmm0_xmm3
+#undef aesdec_xmm0_xmm4
+#undef aesdeclast_xmm0_xmm1
+#undef aesdeclast_xmm0_xmm2
+#undef aesdeclast_xmm0_xmm3
+#undef aesdeclast_xmm0_xmm4
+}
+
+
+#ifdef __x86_64__
+
+/* Encrypt eight blocks using the Intel AES-NI instructions. Blocks are input
+ * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_enc_vec8 (const RIJNDAEL_context *ctx)
+{
+ asm volatile ("movdqa 0x10(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x20(%[key]), %%xmm0\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x30(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x40(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x50(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x60(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x70(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x80(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x90(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0xa0(%[key]), %%xmm0\n\t"
+ "jb .Ldeclast%=\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0xb0(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0xc0(%[key]), %%xmm0\n\t"
+ "je .Ldeclast%=\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0xd0(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0xe0(%[key]), %%xmm0\n"
+
+ ".Ldeclast%=:\n\t"
+ : /* no output */
+ : [key] "r" (ctx->keyschenc),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+}
+
+
+/* Decrypt eight blocks using the Intel AES-NI instructions. Blocks are input
+ * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_dec_vec8 (const RIJNDAEL_context *ctx)
+{
+ asm volatile ("movdqa 0x10(%[key]), %%xmm0\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x20(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x30(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x40(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x50(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x60(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x70(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x80(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x90(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0xa0(%[key]), %%xmm0\n\t"
+ "jb .Ldeclast%=\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0xb0(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0xc0(%[key]), %%xmm0\n\t"
+ "je .Ldeclast%=\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0xd0(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0xe0(%[key]), %%xmm0\n"
+
+ ".Ldeclast%=:\n\t"
+ : /* no output */
+ : [key] "r" (ctx->keyschdec),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+}
+
+#endif /* __x86_64__ */
+
+
+/* Perform a CTR encryption round using the counter CTR and the input
+ block A. Write the result to the output block B and update CTR.
+ CTR needs to be a 16 byte aligned little-endian value. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_ctr (const RIJNDAEL_context *ctx,
+ unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+
+ asm volatile ("movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */
+ "pcmpeqd %%xmm1, %%xmm1\n\t"
+ "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */
+
+ "pshufb %%xmm6, %%xmm5\n\t"
+ "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ (big endian) */
+
+ /* detect if 64-bit carry handling is needed */
+ "cmpl $0xffffffff, 8(%[ctr])\n\t"
+ "jne .Lno_carry%=\n\t"
+ "cmpl $0xffffffff, 12(%[ctr])\n\t"
+ "jne .Lno_carry%=\n\t"
+
+ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */
+ "psubq %%xmm1, %%xmm5\n\t" /* add carry to upper 64bits */
+
+ ".Lno_carry%=:\n\t"
+
+ "pshufb %%xmm6, %%xmm5\n\t"
+ "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */
+
+ "pxor (%[key]), %%xmm0\n\t" /* xmm1 ^= key[0] */
+ "movdqa 0x10(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x20(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x30(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x40(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x50(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x60(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x70(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x80(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x90(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xa0(%[key]), %%xmm1\n\t"
+ "cmpl $10, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xb0(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xc0(%[key]), %%xmm1\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xd0(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xe0(%[key]), %%xmm1\n"
+
+ ".Lenclast%=:\n\t"
+ aesenclast_xmm1_xmm0
+ "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */
+ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */
+ "movdqu %%xmm0, %[dst]" /* Store EncCTR. */
+
+ : [dst] "=m" (*b)
+ : [src] "m" (*a),
+ [ctr] "r" (ctr),
+ [key] "r" (ctx->keyschenc),
+ [rounds] "g" (ctx->rounds)
+ : "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm1_xmm0
+}
+
+
+/* Four blocks at a time variant of do_aesni_ctr. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
+ unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+ static const byte bige_addb_const[4][16] __attribute__ ((aligned (16))) =
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 }
+ };
+ const void *bige_addb = bige_addb_const;
+#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
+#define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
+#define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t"
+#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+#define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t"
+#define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t"
+#define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t"
+
+ /* Register usage:
+ [key] keyschedule
+ xmm0 CTR-0
+ xmm1 temp / round key
+ xmm2 CTR-1
+ xmm3 CTR-2
+ xmm4 CTR-3
+ xmm5 copy of *ctr
+ xmm6 endian swapping mask
+ */
+
+ asm volatile (/* detect if 8-bit carry handling is needed */
+ "addb $4, 15(%[ctr])\n\t"
+ "jc .Ladd32bit%=\n\t"
+
+ "movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */
+ "movdqa 0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) */
+ "movdqa 1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) */
+ "movdqa 2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) */
+ "movdqa 3*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(4) */
+ "paddb %%xmm0, %%xmm2\n\t" /* xmm2 := be(1) + CTR (xmm0) */
+ "paddb %%xmm0, %%xmm3\n\t" /* xmm3 := be(2) + CTR (xmm0) */
+ "paddb %%xmm0, %%xmm4\n\t" /* xmm4 := be(3) + CTR (xmm0) */
+ "paddb %%xmm0, %%xmm5\n\t" /* xmm5 := be(4) + CTR (xmm0) */
+ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */
+ "jmp .Ldone_ctr%=\n\t"
+
+ ".Ladd32bit%=:\n\t"
+ "movdqa %%xmm5, (%[ctr])\n\t" /* Restore CTR. */
+ "movdqa %%xmm5, %%xmm0\n\t" /* xmm0, xmm2 := CTR (xmm5) */
+ "movdqa %%xmm0, %%xmm2\n\t"
+ "pcmpeqd %%xmm1, %%xmm1\n\t"
+ "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */
+
+ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := le(xmm2) */
+ "psubq %%xmm1, %%xmm2\n\t" /* xmm2++ */
+ "movdqa %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */
+ "psubq %%xmm1, %%xmm3\n\t" /* xmm3++ */
+ "movdqa %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */
+ "psubq %%xmm1, %%xmm4\n\t" /* xmm4++ */
+ "movdqa %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */
+ "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ */
+
+ /* detect if 64-bit carry handling is needed */
+ "cmpl $0xffffffff, 8(%[ctr])\n\t"
+ "jne .Lno_carry%=\n\t"
+ "movl 12(%[ctr]), %%esi\n\t"
+ "bswapl %%esi\n\t"
+ "cmpl $0xfffffffc, %%esi\n\t"
+ "jb .Lno_carry%=\n\t" /* no carry */
+
+ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */
+ "je .Lcarry_xmm5%=\n\t" /* esi == 0xfffffffc */
+ "cmpl $0xfffffffe, %%esi\n\t"
+ "jb .Lcarry_xmm4%=\n\t" /* esi == 0xfffffffd */
+ "je .Lcarry_xmm3%=\n\t" /* esi == 0xfffffffe */
+ /* esi == 0xffffffff */
+
+ "psubq %%xmm1, %%xmm2\n\t"
+ ".Lcarry_xmm3%=:\n\t"
+ "psubq %%xmm1, %%xmm3\n\t"
+ ".Lcarry_xmm4%=:\n\t"
+ "psubq %%xmm1, %%xmm4\n\t"
+ ".Lcarry_xmm5%=:\n\t"
+ "psubq %%xmm1, %%xmm5\n\t"
+
+ ".Lno_carry%=:\n\t"
+ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */
+
+ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := be(xmm2) */
+ "pshufb %%xmm6, %%xmm3\n\t" /* xmm3 := be(xmm3) */
+ "pshufb %%xmm6, %%xmm4\n\t" /* xmm4 := be(xmm4) */
+ "pshufb %%xmm6, %%xmm5\n\t" /* xmm5 := be(xmm5) */
+
+ "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */
+
+ ".Ldone_ctr%=:\n\t"
+ :
+ : [ctr] "r" (ctr),
+ [key] "r" (ctx->keyschenc),
+ [addb] "r" (bige_addb)
+ : "%esi", "cc", "memory");
+
+ asm volatile ("pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
+ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */
+ "movdqa 0x10(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x20(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x30(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x40(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x50(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x60(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x70(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x80(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0x90(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0xa0(%[key]), %%xmm1\n\t"
+ "cmpl $10, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0xb0(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0xc0(%[key]), %%xmm1\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0xd0(%[key]), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ aesenc_xmm1_xmm2
+ aesenc_xmm1_xmm3
+ aesenc_xmm1_xmm4
+ "movdqa 0xe0(%[key]), %%xmm1\n"
+
+ ".Lenclast%=:\n\t"
+ aesenclast_xmm1_xmm0
+ aesenclast_xmm1_xmm2
+ aesenclast_xmm1_xmm3
+ aesenclast_xmm1_xmm4
+ :
+ : [key] "r" (ctx->keyschenc),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+
+ asm volatile ("movdqu (%[src]), %%xmm1\n\t" /* Get block 1. */
+ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */
+ "movdqu %%xmm0, (%[dst])\n\t" /* Store block 1 */
+
+ "movdqu 16(%[src]), %%xmm1\n\t" /* Get block 2. */
+ "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */
+ "movdqu %%xmm2, 16(%[dst])\n\t" /* Store block 2. */
+
+ "movdqu 32(%[src]), %%xmm1\n\t" /* Get block 3. */
+ "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */
+ "movdqu %%xmm3, 32(%[dst])\n\t" /* Store block 3. */
+
+ "movdqu 48(%[src]), %%xmm1\n\t" /* Get block 4. */
+ "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */
+ "movdqu %%xmm4, 48(%[dst])" /* Store block 4. */
+ :
+ : [src] "r" (a),
+ [dst] "r" (b)
+ : "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenc_xmm1_xmm2
+#undef aesenc_xmm1_xmm3
+#undef aesenc_xmm1_xmm4
+#undef aesenclast_xmm1_xmm0
+#undef aesenclast_xmm1_xmm2
+#undef aesenclast_xmm1_xmm3
+#undef aesenclast_xmm1_xmm4
+}
+
+
+#ifdef __x86_64__
+
+/* Eight blocks at a time variant of do_aesni_ctr. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_ctr_8 (const RIJNDAEL_context *ctx,
+ unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+ static const byte bige_addb_const[8][16] __attribute__ ((aligned (16))) =
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }
+ };
+ const void *bige_addb = bige_addb_const;
+
+ /* Register usage:
+ [key] keyschedule
+ xmm0 CTR-0
+ xmm1 temp / round key
+ xmm2 CTR-1
+ xmm3 CTR-2
+ xmm4 CTR-3
+ xmm5 copy of *ctr
+ xmm6 endian swapping mask
+ xmm8 CTR-4
+ xmm9 CTR-5
+ xmm10 CTR-6
+ xmm11 CTR-7
+ xmm12 temp
+ xmm13 temp
+ xmm14 temp
+ xmm15 temp
+ */
+
+ asm volatile (/* detect if 8-bit carry handling is needed */
+ "addb $8, 15(%[ctr])\n\t"
+ "jc .Ladd32bit%=\n\t"
+
+ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */
+ "movdqa 16(%[key]), %%xmm7\n\t" /* xmm7 := key[1] */
+
+ "movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */
+ "movdqa %%xmm5, %%xmm2\n\t" /* xmm2 := CTR (xmm5) */
+ "movdqa %%xmm5, %%xmm3\n\t" /* xmm3 := CTR (xmm5) */
+ "movdqa %%xmm5, %%xmm4\n\t" /* xmm4 := CTR (xmm5) */
+ "paddb 0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) + CTR */
+ "paddb 1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) + CTR */
+ "paddb 2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) + CTR */
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
+ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */
+ "aesenc %%xmm7, %%xmm0\n\t"
+ "aesenc %%xmm7, %%xmm2\n\t"
+ "aesenc %%xmm7, %%xmm3\n\t"
+ "aesenc %%xmm7, %%xmm4\n\t"
+ "movdqa %%xmm5, %%xmm8\n\t" /* xmm8 := CTR (xmm5) */
+ "movdqa %%xmm5, %%xmm9\n\t" /* xmm9 := CTR (xmm5) */
+ "movdqa %%xmm5, %%xmm10\n\t" /* xmm10 := CTR (xmm5) */
+ "movdqa %%xmm5, %%xmm11\n\t" /* xmm11 := CTR (xmm5) */
+ "paddb 3*16(%[addb]), %%xmm8\n\t" /* xmm8 := be(4) + CTR */
+ "paddb 4*16(%[addb]), %%xmm9\n\t" /* xmm9 := be(5) + CTR */
+ "paddb 5*16(%[addb]), %%xmm10\n\t" /* xmm10 := be(6) + CTR */
+ "paddb 6*16(%[addb]), %%xmm11\n\t" /* xmm11 := be(7) + CTR */
+ "pxor %%xmm1, %%xmm8\n\t" /* xmm8 ^= key[0] */
+ "pxor %%xmm1, %%xmm9\n\t" /* xmm9 ^= key[0] */
+ "pxor %%xmm1, %%xmm10\n\t" /* xmm10 ^= key[0] */
+ "pxor %%xmm1, %%xmm11\n\t" /* xmm11 ^= key[0] */
+ "aesenc %%xmm7, %%xmm8\n\t"
+ "aesenc %%xmm7, %%xmm9\n\t"
+ "aesenc %%xmm7, %%xmm10\n\t"
+ "aesenc %%xmm7, %%xmm11\n\t"
+
+ "paddb 7*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(8) + CTR */
+
+ "jmp .Ldone_ctr%=\n\t"
+
+ ".Ladd32bit%=:\n\t"
+ "movdqa %%xmm5, (%[ctr])\n\t" /* Restore CTR. */
+ "movdqa %%xmm5, %%xmm0\n\t" /* xmm0, xmm2 := CTR (xmm5) */
+ "movdqa %%xmm0, %%xmm2\n\t"
+ "pcmpeqd %%xmm1, %%xmm1\n\t"
+ "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */
+
+ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := le(xmm2) */
+ "psubq %%xmm1, %%xmm2\n\t" /* xmm2++ */
+ "movdqa %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */
+ "psubq %%xmm1, %%xmm3\n\t" /* xmm3++ */
+ "movdqa %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */
+ "psubq %%xmm1, %%xmm4\n\t" /* xmm4++ */
+ "movdqa %%xmm4, %%xmm8\n\t" /* xmm8 := xmm4 */
+ "psubq %%xmm1, %%xmm8\n\t" /* xmm8++ */
+ "movdqa %%xmm8, %%xmm9\n\t" /* xmm9 := xmm8 */
+ "psubq %%xmm1, %%xmm9\n\t" /* xmm9++ */
+ "movdqa %%xmm9, %%xmm10\n\t" /* xmm10 := xmm9 */
+ "psubq %%xmm1, %%xmm10\n\t" /* xmm10++ */
+ "movdqa %%xmm10, %%xmm11\n\t" /* xmm11 := xmm10 */
+ "psubq %%xmm1, %%xmm11\n\t" /* xmm11++ */
+ "movdqa %%xmm11, %%xmm5\n\t" /* xmm5 := xmm11 */
+ "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ */
+
+ /* detect if 64-bit carry handling is needed */
+ "cmpl $0xffffffff, 8(%[ctr])\n\t"
+ "jne .Lno_carry%=\n\t"
+ "movl 12(%[ctr]), %%esi\n\t"
+ "bswapl %%esi\n\t"
+ "cmpl $0xfffffff8, %%esi\n\t"
+ "jb .Lno_carry%=\n\t" /* no carry */
+
+ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */
+ "je .Lcarry_xmm5%=\n\t" /* esi == 0xfffffff8 */
+ "cmpl $0xfffffffa, %%esi\n\t"
+ "jb .Lcarry_xmm11%=\n\t" /* esi == 0xfffffff9 */
+ "je .Lcarry_xmm10%=\n\t" /* esi == 0xfffffffa */
+ "cmpl $0xfffffffc, %%esi\n\t"
+ "jb .Lcarry_xmm9%=\n\t" /* esi == 0xfffffffb */
+ "je .Lcarry_xmm8%=\n\t" /* esi == 0xfffffffc */
+ "cmpl $0xfffffffe, %%esi\n\t"
+ "jb .Lcarry_xmm4%=\n\t" /* esi == 0xfffffffd */
+ "je .Lcarry_xmm3%=\n\t" /* esi == 0xfffffffe */
+ /* esi == 0xffffffff */
+
+ "psubq %%xmm1, %%xmm2\n\t"
+ ".Lcarry_xmm3%=:\n\t"
+ "psubq %%xmm1, %%xmm3\n\t"
+ ".Lcarry_xmm4%=:\n\t"
+ "psubq %%xmm1, %%xmm4\n\t"
+ ".Lcarry_xmm8%=:\n\t"
+ "psubq %%xmm1, %%xmm8\n\t"
+ ".Lcarry_xmm9%=:\n\t"
+ "psubq %%xmm1, %%xmm9\n\t"
+ ".Lcarry_xmm10%=:\n\t"
+ "psubq %%xmm1, %%xmm10\n\t"
+ ".Lcarry_xmm11%=:\n\t"
+ "psubq %%xmm1, %%xmm11\n\t"
+ ".Lcarry_xmm5%=:\n\t"
+ "psubq %%xmm1, %%xmm5\n\t"
+
+ ".Lno_carry%=:\n\t"
+ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */
+ "movdqa 16(%[key]), %%xmm7\n\t" /* xmm7 := key[1] */
+
+ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := be(xmm2) */
+ "pshufb %%xmm6, %%xmm3\n\t" /* xmm3 := be(xmm3) */
+ "pshufb %%xmm6, %%xmm4\n\t" /* xmm4 := be(xmm4) */
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
+ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */
+ "aesenc %%xmm7, %%xmm0\n\t"
+ "aesenc %%xmm7, %%xmm2\n\t"
+ "aesenc %%xmm7, %%xmm3\n\t"
+ "aesenc %%xmm7, %%xmm4\n\t"
+ "pshufb %%xmm6, %%xmm8\n\t" /* xmm8 := be(xmm8) */
+ "pshufb %%xmm6, %%xmm9\n\t" /* xmm9 := be(xmm9) */
+ "pshufb %%xmm6, %%xmm10\n\t" /* xmm10 := be(xmm10) */
+ "pshufb %%xmm6, %%xmm11\n\t" /* xmm11 := be(xmm11) */
+ "pxor %%xmm1, %%xmm8\n\t" /* xmm8 ^= key[0] */
+ "pxor %%xmm1, %%xmm9\n\t" /* xmm9 ^= key[0] */
+ "pxor %%xmm1, %%xmm10\n\t" /* xmm10 ^= key[0] */
+ "pxor %%xmm1, %%xmm11\n\t" /* xmm11 ^= key[0] */
+ "aesenc %%xmm7, %%xmm8\n\t"
+ "aesenc %%xmm7, %%xmm9\n\t"
+ "aesenc %%xmm7, %%xmm10\n\t"
+ "aesenc %%xmm7, %%xmm11\n\t"
+
+ "pshufb %%xmm6, %%xmm5\n\t" /* xmm5 := be(xmm5) */
+ "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */
+
+ ".align 16\n\t"
+ ".Ldone_ctr%=:\n\t"
+ :
+ : [ctr] "r" (ctr),
+ [key] "r" (ctx->keyschenc),
+ [addb] "r" (bige_addb)
+ : "%esi", "cc", "memory");
+
+ asm volatile ("movdqa 0x20(%[key]), %%xmm1\n\t"
+ "movdqu 0*16(%[src]), %%xmm12\n\t" /* Get block 1. */
+ "movdqu 1*16(%[src]), %%xmm13\n\t" /* Get block 2. */
+ "movdqu 2*16(%[src]), %%xmm14\n\t" /* Get block 3. */
+ "movdqu 3*16(%[src]), %%xmm15\n\t" /* Get block 4. */
+ "movdqu 4*16(%[src]), %%xmm7\n\t" /* Get block 5. */
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "cmpl $12, %[rounds]\n\t"
+ "movdqa 0x30(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0x40(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0x50(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0x60(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0x70(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0x80(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0x90(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0xa0(%[key]), %%xmm1\n\t"
+ "jb .Lenclast%=\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0xb0(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0xc0(%[key]), %%xmm1\n\t"
+ "je .Lenclast%=\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0xd0(%[key]), %%xmm1\n\t"
+ "aesenc %%xmm1, %%xmm0\n\t"
+ "aesenc %%xmm1, %%xmm2\n\t"
+ "aesenc %%xmm1, %%xmm3\n\t"
+ "aesenc %%xmm1, %%xmm4\n\t"
+ "aesenc %%xmm1, %%xmm8\n\t"
+ "aesenc %%xmm1, %%xmm9\n\t"
+ "aesenc %%xmm1, %%xmm10\n\t"
+ "aesenc %%xmm1, %%xmm11\n\t"
+ "movdqa 0xe0(%[key]), %%xmm1\n"
+
+ ".Lenclast%=:\n\t"
+ :
+ : [key] "r" (ctx->keyschenc),
+ [rounds] "r" (ctx->rounds),
+ [src] "r" (a)
+ : "cc", "memory");
+
+ asm volatile ("pxor %%xmm1, %%xmm12\n\t" /* block1 ^= lastkey */
+ "pxor %%xmm1, %%xmm13\n\t" /* block2 ^= lastkey */
+ "pxor %%xmm1, %%xmm14\n\t" /* block3 ^= lastkey */
+ "pxor %%xmm1, %%xmm15\n\t" /* block4 ^= lastkey */
+ "aesenclast %%xmm12, %%xmm0\n\t"
+ "aesenclast %%xmm13, %%xmm2\n\t"
+ "aesenclast %%xmm14, %%xmm3\n\t"
+ "aesenclast %%xmm15, %%xmm4\n\t"
+ "movdqu 5*16(%[src]), %%xmm12\n\t" /* Get block 6. */
+ "movdqu 6*16(%[src]), %%xmm13\n\t" /* Get block 7. */
+ "movdqu 7*16(%[src]), %%xmm14\n\t" /* Get block 8. */
+ "movdqu %%xmm0, 0*16(%[dst])\n\t" /* Store block 1. */
+ "movdqu %%xmm2, 1*16(%[dst])\n\t" /* Store block 2. */
+ "movdqu %%xmm3, 2*16(%[dst])\n\t" /* Store block 3. */
+ "movdqu %%xmm4, 3*16(%[dst])\n\t" /* Store block 4. */
+ "pxor %%xmm1, %%xmm7\n\t" /* block5 ^= lastkey */
+ "pxor %%xmm1, %%xmm12\n\t" /* block6 ^= lastkey */
+ "pxor %%xmm1, %%xmm13\n\t" /* block7 ^= lastkey */
+ "pxor %%xmm1, %%xmm14\n\t" /* block8 ^= lastkey */
+ "aesenclast %%xmm7, %%xmm8\n\t"
+ "aesenclast %%xmm12, %%xmm9\n\t"
+ "aesenclast %%xmm13, %%xmm10\n\t"
+ "aesenclast %%xmm14, %%xmm11\n\t"
+ "movdqu %%xmm8, 4*16(%[dst])\n\t" /* Store block 8. */
+ "movdqu %%xmm9, 5*16(%[dst])\n\t" /* Store block 9. */
+ "movdqu %%xmm10, 6*16(%[dst])\n\t" /* Store block 10. */
+ "movdqu %%xmm11, 7*16(%[dst])\n\t" /* Store block 11. */
+ :
+ : [src] "r" (a),
+ [dst] "r" (b)
+ : "memory");
+}
+
+#endif /* __x86_64__ */
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+ const unsigned char *src)
+{
+ aesni_prepare ();
+ asm volatile ("movdqu %[src], %%xmm0\n\t"
+ :
+ : [src] "m" (*src)
+ : "memory" );
+ do_aesni_enc (ctx);
+ asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+ : [dst] "=m" (*dst)
+ :
+ : "memory" );
+ aesni_cleanup ();
+ return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ aesni_prepare ();
+
+ asm volatile ("movdqu %[iv], %%xmm0\n\t"
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_aesni_enc (ctx);
+
+ asm volatile ("movdqu %[inbuf], %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm0, %[iv]\n\t"
+ : [iv] "=m" (*iv)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks, int cbc_mac)
+{
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare ();
+ aesni_prepare_2_7();
+
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ asm volatile ("movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ : /* No output */
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("movdqa %%xmm0, %%xmm5\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ if (!cbc_mac)
+ outbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ : [iv] "=m" (*iv)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare ();
+ aesni_prepare_2_7();
+
+ asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
+ "movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */
+ : /* No output */
+ : [mask] "m" (*be_mask),
+ [ctr] "m" (*ctr)
+ : "memory");
+
+#ifdef __x86_64__
+ if (nblocks >= 8)
+ {
+ aesni_prepare_8_15_variable;
+
+ aesni_prepare_8_15();
+
+ for ( ;nblocks >= 8 ; nblocks -= 8 )
+ {
+ do_aesni_ctr_8 (ctx, ctr, outbuf, inbuf);
+ outbuf += 8*BLOCKSIZE;
+ inbuf += 8*BLOCKSIZE;
+ }
+
+ aesni_cleanup_8_15();
+ }
+#endif
+
+ for ( ;nblocks >= 4 ; nblocks -= 4 )
+ {
+ do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf);
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+ for ( ;nblocks; nblocks-- )
+ {
+ do_aesni_ctr (ctx, ctr, outbuf, inbuf);
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+ const unsigned char *src)
+{
+ aesni_prepare ();
+ asm volatile ("movdqu %[src], %%xmm0\n\t"
+ :
+ : [src] "m" (*src)
+ : "memory" );
+ do_aesni_dec (ctx);
+ asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+ : [dst] "=m" (*dst)
+ :
+ : "memory" );
+ aesni_cleanup ();
+ return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare ();
+ aesni_prepare_2_7();
+
+ asm volatile ("movdqu %[iv], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory" );
+
+ /* CFB decryption can be parallelized */
+
+#ifdef __x86_64__
+ if (nblocks >= 8)
+ {
+ aesni_prepare_8_15_variable;
+
+ aesni_prepare_8_15();
+
+ for ( ;nblocks >= 8; nblocks -= 8)
+ {
+ asm volatile
+ ("movdqa (%[key]), %%xmm0\n\t"
+
+ "movdqu %%xmm6, %%xmm1\n\t" /* load input blocks */
+ "movdqu 0*16(%[inbuf]), %%xmm2\n\t"
+ "movdqu 1*16(%[inbuf]), %%xmm3\n\t"
+ "movdqu 2*16(%[inbuf]), %%xmm4\n\t"
+ "movdqu 3*16(%[inbuf]), %%xmm8\n\t"
+ "movdqu 4*16(%[inbuf]), %%xmm9\n\t"
+ "movdqu 5*16(%[inbuf]), %%xmm10\n\t"
+ "movdqu 6*16(%[inbuf]), %%xmm11\n\t"
+
+ "movdqu 7*16(%[inbuf]), %%xmm6\n\t" /* update IV */
+
+ "movdqa %%xmm2, %%xmm12\n\t"
+ "movdqa %%xmm3, %%xmm13\n\t"
+ "movdqa %%xmm4, %%xmm14\n\t"
+ "movdqa %%xmm8, %%xmm15\n\t"
+
+ "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */
+ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */
+ "pxor %%xmm0, %%xmm8\n\t" /* xmm8 ^= key[0] */
+ "pxor %%xmm0, %%xmm9\n\t" /* xmm9 ^= key[0] */
+ "pxor %%xmm0, %%xmm10\n\t" /* xmm10 ^= key[0] */
+ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 ^= key[0] */
+ : /* No output */
+ : [inbuf] "r" (inbuf),
+ [key] "r" (ctx->keyschenc)
+ : "memory");
+
+ do_aesni_enc_vec8 (ctx);
+
+ asm volatile
+ (
+ "pxor %%xmm0, %%xmm12\n\t"
+ "pxor %%xmm0, %%xmm13\n\t"
+ "pxor %%xmm0, %%xmm14\n\t"
+ "pxor %%xmm0, %%xmm15\n\t"
+ "aesenclast %%xmm12, %%xmm1\n\t"
+ "aesenclast %%xmm13, %%xmm2\n\t"
+ "aesenclast %%xmm14, %%xmm3\n\t"
+ "aesenclast %%xmm15, %%xmm4\n\t"
+
+ "movdqu 4*16(%[inbuf]), %%xmm12\n\t"
+ "movdqu 5*16(%[inbuf]), %%xmm13\n\t"
+ "movdqu 6*16(%[inbuf]), %%xmm14\n\t"
+ "movdqu 7*16(%[inbuf]), %%xmm15\n\t"
+ "pxor %%xmm0, %%xmm12\n\t"
+ "pxor %%xmm0, %%xmm13\n\t"
+ "pxor %%xmm0, %%xmm14\n\t"
+ "pxor %%xmm0, %%xmm15\n\t"
+
+ "aesenclast %%xmm12, %%xmm8\n\t"
+ "aesenclast %%xmm13, %%xmm9\n\t"
+ "aesenclast %%xmm14, %%xmm10\n\t"
+ "aesenclast %%xmm15, %%xmm11\n\t"
+
+ "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+ "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+ "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+ "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+
+ "movdqu %%xmm8, 4*16(%[outbuf])\n\t"
+ "movdqu %%xmm9, 5*16(%[outbuf])\n\t"
+ "movdqu %%xmm10, 6*16(%[outbuf])\n\t"
+ "movdqu %%xmm11, 7*16(%[outbuf])\n\t"
+
+ : /* No output */
+ : [inbuf] "r" (inbuf),
+ [outbuf] "r" (outbuf)
+ : "memory");
+
+ outbuf += 8*BLOCKSIZE;
+ inbuf += 8*BLOCKSIZE;
+ }
+
+ aesni_cleanup_8_15();
+ }
+#endif
+
+ for ( ;nblocks >= 4; nblocks -= 4)
+ {
+ asm volatile
+ ("movdqu %%xmm6, %%xmm1\n\t" /* load input blocks */
+ "movdqu 0*16(%[inbuf]), %%xmm2\n\t"
+ "movdqu 1*16(%[inbuf]), %%xmm3\n\t"
+ "movdqu 2*16(%[inbuf]), %%xmm4\n\t"
+
+ "movdqu 3*16(%[inbuf]), %%xmm6\n\t" /* update IV */
+ : /* No output */
+ : [inbuf] "r" (inbuf)
+ : "memory");
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile
+ ("movdqu 0*16(%[inbuf]), %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+
+ "movdqu 1*16(%[inbuf]), %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+
+ "movdqu 2*16(%[inbuf]), %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+
+ "movdqu 3*16(%[inbuf]), %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+
+ : /* No output */
+ : [inbuf] "r" (inbuf),
+ [outbuf] "r" (outbuf)
+ : "memory");
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm6, %%xmm0\n\t" ::: "cc");
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_aesni_enc (ctx);
+
+ asm volatile ("movdqa %%xmm0, %%xmm6\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "movdqu %%xmm6, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm0, %[iv]\n\t"
+ : [iv] "=m" (*iv)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare ();
+ aesni_prepare_2_7();
+
+ if ( !ctx->decryption_prepared )
+ {
+ do_aesni_prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+
+ asm volatile
+ ("movdqu %[iv], %%xmm5\n\t" /* use xmm5 as fast IV storage */
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory");
+
+#ifdef __x86_64__
+ if (nblocks >= 8)
+ {
+ aesni_prepare_8_15_variable;
+
+ aesni_prepare_8_15();
+
+ for ( ;nblocks >= 8 ; nblocks -= 8 )
+ {
+ asm volatile
+ ("movdqa (%[key]), %%xmm0\n\t"
+
+ "movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */
+ "movdqu 1*16(%[inbuf]), %%xmm2\n\t"
+ "movdqu 2*16(%[inbuf]), %%xmm3\n\t"
+ "movdqu 3*16(%[inbuf]), %%xmm4\n\t"
+ "movdqu 4*16(%[inbuf]), %%xmm8\n\t"
+ "movdqu 5*16(%[inbuf]), %%xmm9\n\t"
+ "movdqu 6*16(%[inbuf]), %%xmm10\n\t"
+ "movdqu 7*16(%[inbuf]), %%xmm11\n\t"
+
+ "movdqa %%xmm1, %%xmm12\n\t"
+ "movdqa %%xmm2, %%xmm13\n\t"
+ "movdqa %%xmm3, %%xmm14\n\t"
+ "movdqa %%xmm4, %%xmm15\n\t"
+
+ "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */
+ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */
+ "pxor %%xmm0, %%xmm8\n\t" /* xmm8 ^= key[0] */
+ "pxor %%xmm0, %%xmm9\n\t" /* xmm9 ^= key[0] */
+ "pxor %%xmm0, %%xmm10\n\t" /* xmm10 ^= key[0] */
+ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 ^= key[0] */
+
+ : /* No output */
+ : [inbuf] "r" (inbuf),
+ [key] "r" (ctx->keyschdec)
+ : "memory");
+
+ do_aesni_dec_vec8 (ctx);
+
+ asm volatile
+ (
+ "pxor %%xmm0, %%xmm5\n\t" /* xor IV with key */
+ "pxor %%xmm0, %%xmm12\n\t" /* xor IV with key */
+ "pxor %%xmm0, %%xmm13\n\t" /* xor IV with key */
+ "pxor %%xmm0, %%xmm14\n\t" /* xor IV with key */
+ "pxor %%xmm0, %%xmm15\n\t" /* xor IV with key */
+
+ "aesdeclast %%xmm5, %%xmm1\n\t"
+ "aesdeclast %%xmm12, %%xmm2\n\t"
+ "aesdeclast %%xmm13, %%xmm3\n\t"
+ "aesdeclast %%xmm14, %%xmm4\n\t"
+
+ "movdqu 4*16(%[inbuf]), %%xmm12\n\t"
+ "movdqu 5*16(%[inbuf]), %%xmm13\n\t"
+ "movdqu 6*16(%[inbuf]), %%xmm14\n\t"
+ "movdqu 7*16(%[inbuf]), %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm12\n\t" /* xor IV with key */
+ "pxor %%xmm0, %%xmm13\n\t" /* xor IV with key */
+ "pxor %%xmm0, %%xmm14\n\t" /* xor IV with key */
+
+ "aesdeclast %%xmm15, %%xmm8\n\t"
+ "aesdeclast %%xmm12, %%xmm9\n\t"
+ "aesdeclast %%xmm13, %%xmm10\n\t"
+ "aesdeclast %%xmm14, %%xmm11\n\t"
+
+ "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+ "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+ "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+ "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+ "movdqu %%xmm8, 4*16(%[outbuf])\n\t"
+ "movdqu %%xmm9, 5*16(%[outbuf])\n\t"
+ "movdqu %%xmm10, 6*16(%[outbuf])\n\t"
+ "movdqu %%xmm11, 7*16(%[outbuf])\n\t"
+
+ : /* No output */
+ : [inbuf] "r" (inbuf),
+ [outbuf] "r" (outbuf)
+ : "memory");
+
+ outbuf += 8*BLOCKSIZE;
+ inbuf += 8*BLOCKSIZE;
+ }
+
+ aesni_cleanup_8_15();
+ }
+#endif
+
+ for ( ;nblocks >= 4 ; nblocks -= 4 )
+ {
+ asm volatile
+ ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */
+ "movdqu 1*16(%[inbuf]), %%xmm2\n\t"
+ "movdqu 2*16(%[inbuf]), %%xmm3\n\t"
+ "movdqu 3*16(%[inbuf]), %%xmm4\n\t"
+ : /* No output */
+ : [inbuf] "r" (inbuf)
+ : "memory");
+
+ do_aesni_dec_vec4 (ctx);
+
+ asm volatile
+ ("pxor %%xmm5, %%xmm1\n\t" /* xor IV with output */
+ "movdqu 0*16(%[inbuf]), %%xmm5\n\t" /* load new IV */
+ "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+
+ "pxor %%xmm5, %%xmm2\n\t" /* xor IV with output */
+ "movdqu 1*16(%[inbuf]), %%xmm5\n\t" /* load new IV */
+ "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+
+ "pxor %%xmm5, %%xmm3\n\t" /* xor IV with output */
+ "movdqu 2*16(%[inbuf]), %%xmm5\n\t" /* load new IV */
+ "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+
+ "pxor %%xmm5, %%xmm4\n\t" /* xor IV with output */
+ "movdqu 3*16(%[inbuf]), %%xmm5\n\t" /* load new IV */
+ "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+
+ : /* No output */
+ : [inbuf] "r" (inbuf),
+ [outbuf] "r" (outbuf)
+ : "memory");
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+
+ for ( ;nblocks; nblocks-- )
+ {
+ asm volatile
+ ("movdqu %[inbuf], %%xmm0\n\t"
+ "movdqa %%xmm0, %%xmm2\n\t" /* use xmm2 as savebuf */
+ : /* No output */
+ : [inbuf] "m" (*inbuf)
+ : "memory");
+
+ /* uses only xmm0 and xmm1 */
+ do_aesni_dec (ctx);
+
+ asm volatile
+ ("pxor %%xmm5, %%xmm0\n\t" /* xor IV with output */
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ "movdqu %%xmm2, %%xmm5\n\t" /* store savebuf as new IV */
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory");
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile
+ ("movdqu %%xmm5, %[iv]\n\t" /* store IV */
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory");
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+
+ /* Calculate checksum */
+ asm volatile ("movdqu %[checksum], %%xmm6\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ :
+ :[checksum] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ if (0) {}
+#if defined(HAVE_GCC_INLINE_ASM_AVX2)
+ else if (nblocks >= 16 && ctx->use_avx2)
+ {
+ /* Use wider 256-bit registers for fast xoring of plaintext. */
+ asm volatile ("vzeroupper\n\t"
+ "vpxor %%xmm0, %%xmm0, %%xmm0\n\t"
+ "vpxor %%xmm4, %%xmm4, %%xmm4\n\t"
+ "vpxor %%xmm5, %%xmm5, %%xmm5\n\t"
+ "vpxor %%xmm7, %%xmm7, %%xmm7\n\t"
+ :
+ :
+ : "memory");
+
+ for (;nblocks >= 16; nblocks -= 16)
+ {
+ asm volatile ("vpxor %[ptr0], %%ymm6, %%ymm6\n\t"
+ "vpxor %[ptr1], %%ymm1, %%ymm1\n\t"
+ "vpxor %[ptr2], %%ymm2, %%ymm2\n\t"
+ "vpxor %[ptr3], %%ymm3, %%ymm3\n\t"
+ :
+ : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
+ [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
+ [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
+ [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+ : "memory" );
+ asm volatile ("vpxor %[ptr4], %%ymm0, %%ymm0\n\t"
+ "vpxor %[ptr5], %%ymm4, %%ymm4\n\t"
+ "vpxor %[ptr6], %%ymm5, %%ymm5\n\t"
+ "vpxor %[ptr7], %%ymm7, %%ymm7\n\t"
+ :
+ : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+ [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
+ [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
+ [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
+ : "memory" );
+ plaintext += BLOCKSIZE * 16;
+ }
+
+ asm volatile ("vpxor %%ymm0, %%ymm6, %%ymm6\n\t"
+ "vpxor %%ymm4, %%ymm1, %%ymm1\n\t"
+ "vpxor %%ymm5, %%ymm2, %%ymm2\n\t"
+ "vpxor %%ymm7, %%ymm3, %%ymm3\n\t"
+ "vextracti128 $1, %%ymm6, %%xmm0\n\t"
+ "vextracti128 $1, %%ymm1, %%xmm4\n\t"
+ "vextracti128 $1, %%ymm2, %%xmm5\n\t"
+ "vextracti128 $1, %%ymm3, %%xmm7\n\t"
+ "vpxor %%xmm0, %%xmm6, %%xmm6\n\t"
+ "vpxor %%xmm4, %%xmm1, %%xmm1\n\t"
+ "vpxor %%xmm5, %%xmm2, %%xmm2\n\t"
+ "vpxor %%xmm7, %%xmm3, %%xmm3\n\t"
+ "vzeroupper\n\t"
+ :
+ :
+ : "memory" );
+ }
+#endif
+#if defined(HAVE_GCC_INLINE_ASM_AVX)
+ else if (nblocks >= 16 && ctx->use_avx)
+ {
+ /* Same as AVX2, except using 256-bit floating point instructions. */
+ asm volatile ("vzeroupper\n\t"
+ "vxorpd %%xmm0, %%xmm0, %%xmm0\n\t"
+ "vxorpd %%xmm4, %%xmm4, %%xmm4\n\t"
+ "vxorpd %%xmm5, %%xmm5, %%xmm5\n\t"
+ "vxorpd %%xmm7, %%xmm7, %%xmm7\n\t"
+ :
+ :
+ : "memory");
+
+ for (;nblocks >= 16; nblocks -= 16)
+ {
+ asm volatile ("vxorpd %[ptr0], %%ymm6, %%ymm6\n\t"
+ "vxorpd %[ptr1], %%ymm1, %%ymm1\n\t"
+ "vxorpd %[ptr2], %%ymm2, %%ymm2\n\t"
+ "vxorpd %[ptr3], %%ymm3, %%ymm3\n\t"
+ :
+ : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
+ [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
+ [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
+ [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+ : "memory" );
+ asm volatile ("vxorpd %[ptr4], %%ymm0, %%ymm0\n\t"
+ "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t"
+ "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t"
+ "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t"
+ :
+ : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+ [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
+ [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
+ [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
+ : "memory" );
+ plaintext += BLOCKSIZE * 16;
+ }
+
+ asm volatile ("vxorpd %%ymm0, %%ymm6, %%ymm6\n\t"
+ "vxorpd %%ymm4, %%ymm1, %%ymm1\n\t"
+ "vxorpd %%ymm5, %%ymm2, %%ymm2\n\t"
+ "vxorpd %%ymm7, %%ymm3, %%ymm3\n\t"
+ "vextractf128 $1, %%ymm6, %%xmm0\n\t"
+ "vextractf128 $1, %%ymm1, %%xmm4\n\t"
+ "vextractf128 $1, %%ymm2, %%xmm5\n\t"
+ "vextractf128 $1, %%ymm3, %%xmm7\n\t"
+ "vxorpd %%xmm0, %%xmm6, %%xmm6\n\t"
+ "vxorpd %%xmm4, %%xmm1, %%xmm1\n\t"
+ "vxorpd %%xmm5, %%xmm2, %%xmm2\n\t"
+ "vxorpd %%xmm7, %%xmm3, %%xmm3\n\t"
+ "vzeroupper\n\t"
+ :
+ :
+ : "memory" );
+ }
+#endif
+
+ for (;nblocks >= 4; nblocks -= 4)
+ {
+ asm volatile ("movdqu %[ptr0], %%xmm0\n\t"
+ "movdqu %[ptr1], %%xmm4\n\t"
+ "movdqu %[ptr2], %%xmm5\n\t"
+ "movdqu %[ptr3], %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "pxor %%xmm4, %%xmm1\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "pxor %%xmm7, %%xmm3\n\t"
+ :
+ : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)),
+ [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE)),
+ [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE)),
+ [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE))
+ : "memory" );
+ plaintext += BLOCKSIZE * 4;
+ }
+
+ for (;nblocks >= 1; nblocks -= 1)
+ {
+ asm volatile ("movdqu %[ptr0], %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ :
+ : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE))
+ : "memory" );
+ plaintext += BLOCKSIZE;
+ }
+
+ asm volatile ("pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "movdqu %%xmm6, %[checksum]\n\t"
+ : [checksum] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+}
+
+
+static unsigned int ASM_FUNC_ATTR_NOINLINE
+aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+ const unsigned char *l;
+ byte tmpbuf_store[3 * 16 + 15];
+ byte *tmpbuf;
+ aesni_prepare_2_7_variable;
+
+ asm volatile ("" : "=r" (tmpbuf) : "0" (tmpbuf_store) : "memory");
+ tmpbuf = tmpbuf + (-(uintptr_t)tmpbuf & 15);
+
+ aesni_prepare ();
+ aesni_prepare_2_7 ();
+
+ /* Preload Offset */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm7\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv),
+ [ctr] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ for ( ;nblocks && n % 4; nblocks-- )
+ {
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+#ifdef __x86_64__
+ if (nblocks >= 8)
+ {
+ unsigned char last_xor_first_key_store[16 + 15];
+ unsigned char *lxf_key;
+ aesni_prepare_8_15_variable;
+
+ asm volatile (""
+ : "=r" (lxf_key)
+ : "0" (last_xor_first_key_store)
+ : "memory");
+ lxf_key = lxf_key + (-(uintptr_t)lxf_key & 15);
+
+ aesni_prepare_8_15();
+
+ asm volatile ("movdqu %[l0], %%xmm6\n\t"
+ "movdqa %[last_key], %%xmm0\n\t"
+ "pxor %[first_key], %%xmm5\n\t"
+ "pxor %[first_key], %%xmm0\n\t"
+ "movdqa %%xmm0, %[lxfkey]\n\t"
+ : [lxfkey] "=m" (*lxf_key)
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [last_key] "m" (ctx->keyschenc[ctx->rounds][0][0]),
+ [first_key] "m" (ctx->keyschenc[0][0][0])
+ : "memory" );
+
+ for ( ;nblocks >= 8 ; nblocks -= 8 )
+ {
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ asm volatile ("movdqu %[l0l1], %%xmm10\n\t"
+ "movdqu %[l1], %%xmm11\n\t"
+ "movdqu %[l3], %%xmm15\n\t"
+ :
+ : [l0l1] "m" (*c->u_mode.ocb.L0L1),
+ [l1] "m" (*c->u_mode.ocb.L[1]),
+ [l3] "m" (*l)
+ : "memory" );
+
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i) */
+ asm volatile ("movdqu %[inbuf0], %%xmm1\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ :
+ : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+ "movdqu %[inbuf4], %%xmm8\n\t"
+ "movdqu %[inbuf5], %%xmm9\n\t"
+ :
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)),
+ [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)),
+ [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqa %[lxfkey], %%xmm0\n\t"
+ "movdqa %%xmm6, %%xmm12\n\t"
+ "pxor %%xmm5, %%xmm12\n\t"
+ "pxor %%xmm1, %%xmm7\n\t"
+ "pxor %%xmm12, %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm12\n\t"
+
+ "movdqa %%xmm10, %%xmm13\n\t"
+ "pxor %%xmm5, %%xmm13\n\t"
+ "pxor %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm13, %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm13\n\t"
+
+ "movdqa %%xmm11, %%xmm14\n\t"
+ "pxor %%xmm5, %%xmm14\n\t"
+ "pxor %%xmm3, %%xmm7\n\t"
+ "pxor %%xmm14, %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm14\n\t"
+
+ "pxor %%xmm11, %%xmm5\n\t"
+ "pxor %%xmm15, %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqa %%xmm5, %%xmm15\n\t"
+ "pxor %%xmm0, %%xmm15\n\t"
+
+ "movdqa %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm8, %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm8\n\t"
+ "pxor %[lxfkey], %%xmm0\n\t"
+ "movdqa %%xmm0, %[tmpbuf0]\n\t"
+
+ "movdqa %%xmm10, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm9, %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm9\n\t"
+ "pxor %[lxfkey], %%xmm0\n"
+ "movdqa %%xmm0, %[tmpbuf1]\n\t"
+ : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+ : [lxfkey] "m" (*lxf_key)
+ : "memory" );
+ asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
+ "movdqa %%xmm11, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm10, %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm10\n\t"
+ "pxor %[lxfkey], %%xmm0\n\t"
+ "movdqa %%xmm0, %[tmpbuf2]\n\t"
+ : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+ : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)),
+ [lxfkey] "m" (*lxf_key)
+ : "memory" );
+ asm volatile ("movdqu %[l7], %%xmm0\n\t"
+ "pxor %%xmm11, %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "movdqa 0x10(%[key]), %%xmm0\n\t"
+ "movdqu %[inbuf7], %%xmm11\n\t"
+ "pxor %%xmm11, %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm11\n\t"
+ :
+ : [l7] "m" (*l),
+ [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)),
+ [key] "r" (ctx->keyschenc)
+ : "memory" );
+
+ asm volatile ("cmpl $12, %[rounds]\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x20(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x30(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x40(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x50(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x60(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x70(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x80(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0x90(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "jb .Ldeclast%=\n\t"
+ "movdqa 0xa0(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0xb0(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "je .Ldeclast%=\n\t"
+ "movdqa 0xc0(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+ "movdqa 0xd0(%[key]), %%xmm0\n\t"
+ "aesenc %%xmm0, %%xmm1\n\t"
+ "aesenc %%xmm0, %%xmm2\n\t"
+ "aesenc %%xmm0, %%xmm3\n\t"
+ "aesenc %%xmm0, %%xmm4\n\t"
+ "aesenc %%xmm0, %%xmm8\n\t"
+ "aesenc %%xmm0, %%xmm9\n\t"
+ "aesenc %%xmm0, %%xmm10\n\t"
+ "aesenc %%xmm0, %%xmm11\n\t"
+
+ ".Ldeclast%=:\n\t"
+ :
+ : [key] "r" (ctx->keyschenc),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+
+ asm volatile ("aesenclast %%xmm12, %%xmm1\n\t"
+ "aesenclast %%xmm13, %%xmm2\n\t"
+ "aesenclast %%xmm14, %%xmm3\n\t"
+ "aesenclast %%xmm15, %%xmm4\n\t"
+ "aesenclast %[tmpbuf0],%%xmm8\n\t"
+ "aesenclast %[tmpbuf1],%%xmm9\n\t"
+ "aesenclast %[tmpbuf2],%%xmm10\n\t"
+ :
+ : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+ [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
+ [lxfkey] "m" (*lxf_key)
+ : "memory" );
+ asm volatile ("aesenclast %%xmm5, %%xmm11\n\t"
+ "pxor %[lxfkey], %%xmm11\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [lxfkey] "m" (*lxf_key)
+ : "memory" );
+ asm volatile ("movdqu %%xmm3, %[outbuf2]\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ "movdqu %%xmm8, %[outbuf4]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+ [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+ :
+ : "memory" );
+ asm volatile ("movdqu %%xmm9, %[outbuf5]\n\t"
+ "movdqu %%xmm10, %[outbuf6]\n\t"
+ "movdqu %%xmm11, %[outbuf7]\n\t"
+ : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+ [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
+ [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ outbuf += 8*BLOCKSIZE;
+ inbuf += 8*BLOCKSIZE;
+ }
+
+ asm volatile ("pxor %[first_key], %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm0\n\t"
+ "movdqu %%xmm0, %[lxfkey]\n\t"
+ : [lxfkey] "=m" (*lxf_key)
+ : [first_key] "m" (ctx->keyschenc[0][0][0])
+ : "memory" );
+
+ aesni_cleanup_8_15();
+ }
+#endif
+
+ for ( ;nblocks >= 4 ; nblocks -= 4 )
+ {
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "movdqu %[l0l1], %%xmm3\n\t"
+ :
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l0l1] "m" (*c->u_mode.ocb.L0L1),
+ [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm4\n\t"
+ "movdqu %[l3], %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqa %%xmm0, %[tmpbuf0]\n\t"
+ : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE))
+ : [l1] "m" (*c->u_mode.ocb.L[1]),
+ [l3] "m" (*l)
+ : "memory" );
+ asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm2, %%xmm7\n\t"
+ "pxor %%xmm3, %%xmm2\n\t"
+ "movdqa %%xmm3, %[tmpbuf1]\n\t"
+ : [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+ : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm3, %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqa %%xmm0, %[tmpbuf2]\n\t"
+ : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+ :
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("pxor %%xmm6, %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm5\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm4, %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile ("pxor %[tmpbuf0],%%xmm1\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "pxor %[tmpbuf1],%%xmm2\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("pxor %[tmpbuf2],%%xmm3\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+ : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+ : "memory" );
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+
+ for ( ;nblocks; nblocks-- )
+ {
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm7, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_iv.iv),
+ [ctr] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+
+ asm volatile ("pxor %%xmm0, %%xmm0\n\t"
+ "movdqa %%xmm0, %[tmpbuf0]\n\t"
+ "movdqa %%xmm0, %[tmpbuf1]\n\t"
+ "movdqa %%xmm0, %[tmpbuf2]\n\t"
+ : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)),
+ [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+
+ return 0;
+}
+
+
+static unsigned int ASM_FUNC_ATTR_NOINLINE
+aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks_arg)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+ const unsigned char *l;
+ size_t nblocks = nblocks_arg;
+ byte tmpbuf_store[3 * 16 + 15];
+ byte *tmpbuf;
+ aesni_prepare_2_7_variable;
+
+ asm volatile ("" : "=r" (tmpbuf) : "0" (tmpbuf_store) : "memory");
+ tmpbuf = tmpbuf + (-(uintptr_t)tmpbuf & 15);
+
+ aesni_prepare ();
+ aesni_prepare_2_7 ();
+
+ if ( !ctx->decryption_prepared )
+ {
+ do_aesni_prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+
+ /* Preload Offset */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv)
+ : "memory" );
+
+ for ( ;nblocks && n % 4; nblocks-- )
+ {
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_dec (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+#ifdef __x86_64__
+ if (nblocks >= 8)
+ {
+ unsigned char last_xor_first_key_store[16 + 15];
+ unsigned char *lxf_key;
+ aesni_prepare_8_15_variable;
+
+ asm volatile (""
+ : "=r" (lxf_key)
+ : "0" (last_xor_first_key_store)
+ : "memory");
+ lxf_key = lxf_key + (-(uintptr_t)lxf_key & 15);
+
+ aesni_prepare_8_15();
+
+ asm volatile ("movdqu %[l0], %%xmm6\n\t"
+ "movdqa %[last_key], %%xmm0\n\t"
+ "pxor %[first_key], %%xmm5\n\t"
+ "pxor %[first_key], %%xmm0\n\t"
+ "movdqa %%xmm0, %[lxfkey]\n\t"
+ : [lxfkey] "=m" (*lxf_key)
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [last_key] "m" (ctx->keyschdec[ctx->rounds][0][0]),
+ [first_key] "m" (ctx->keyschdec[0][0][0])
+ : "memory" );
+
+ for ( ;nblocks >= 8 ; nblocks -= 8 )
+ {
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ asm volatile ("movdqu %[l0l1], %%xmm10\n\t"
+ "movdqu %[l1], %%xmm11\n\t"
+ "movdqu %[l3], %%xmm15\n\t"
+ :
+ : [l0l1] "m" (*c->u_mode.ocb.L0L1),
+ [l1] "m" (*c->u_mode.ocb.L[1]),
+ [l3] "m" (*l)
+ : "memory" );
+
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i) */
+ asm volatile ("movdqu %[inbuf0], %%xmm1\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ :
+ : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+ "movdqu %[inbuf4], %%xmm8\n\t"
+ "movdqu %[inbuf5], %%xmm9\n\t"
+ :
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)),
+ [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)),
+ [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqa %[lxfkey], %%xmm0\n\t"
+ "movdqa %%xmm6, %%xmm12\n\t"
+ "pxor %%xmm5, %%xmm12\n\t"
+ "pxor %%xmm12, %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm12\n\t"
+
+ "movdqa %%xmm10, %%xmm13\n\t"
+ "pxor %%xmm5, %%xmm13\n\t"
+ "pxor %%xmm13, %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm13\n\t"
+
+ "movdqa %%xmm11, %%xmm14\n\t"
+ "pxor %%xmm5, %%xmm14\n\t"
+ "pxor %%xmm14, %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm14\n\t"
+
+ "pxor %%xmm11, %%xmm5\n\t"
+ "pxor %%xmm15, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqa %%xmm5, %%xmm15\n\t"
+ "pxor %%xmm0, %%xmm15\n\t"
+
+ "movdqa %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm8\n\t"
+ "pxor %[lxfkey], %%xmm0\n\t"
+ "movdqa %%xmm0, %[tmpbuf0]\n\t"
+
+ "movdqa %%xmm10, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm9\n\t"
+ "pxor %[lxfkey], %%xmm0\n"
+ "movdqa %%xmm0, %[tmpbuf1]\n\t"
+ : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+ : [lxfkey] "m" (*lxf_key)
+ : "memory" );
+ asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
+ "movdqa %%xmm11, %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm10\n\t"
+ "pxor %[lxfkey], %%xmm0\n\t"
+ "movdqa %%xmm0, %[tmpbuf2]\n\t"
+ : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+ : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)),
+ [lxfkey] "m" (*lxf_key)
+ : "memory" );
+ asm volatile ("movdqu %[l7], %%xmm0\n\t"
+ "pxor %%xmm11, %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "movdqa 0x10(%[key]), %%xmm0\n\t"
+ "movdqu %[inbuf7], %%xmm11\n\t"
+ "pxor %%xmm5, %%xmm11\n\t"
+ :
+ : [l7] "m" (*l),
+ [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)),
+ [key] "r" (ctx->keyschdec)
+ : "memory" );
+
+ asm volatile ("cmpl $12, %[rounds]\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x20(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x30(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x40(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x50(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x60(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x70(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x80(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0x90(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "jb .Ldeclast%=\n\t"
+ "movdqa 0xa0(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0xb0(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "je .Ldeclast%=\n\t"
+ "movdqa 0xc0(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+ "movdqa 0xd0(%[key]), %%xmm0\n\t"
+ "aesdec %%xmm0, %%xmm1\n\t"
+ "aesdec %%xmm0, %%xmm2\n\t"
+ "aesdec %%xmm0, %%xmm3\n\t"
+ "aesdec %%xmm0, %%xmm4\n\t"
+ "aesdec %%xmm0, %%xmm8\n\t"
+ "aesdec %%xmm0, %%xmm9\n\t"
+ "aesdec %%xmm0, %%xmm10\n\t"
+ "aesdec %%xmm0, %%xmm11\n\t"
+
+ ".Ldeclast%=:\n\t"
+ :
+ : [key] "r" (ctx->keyschdec),
+ [rounds] "r" (ctx->rounds)
+ : "cc", "memory");
+
+ asm volatile ("aesdeclast %%xmm12, %%xmm1\n\t"
+ "aesdeclast %%xmm13, %%xmm2\n\t"
+ "aesdeclast %%xmm14, %%xmm3\n\t"
+ "aesdeclast %%xmm15, %%xmm4\n\t"
+ "aesdeclast %[tmpbuf0],%%xmm8\n\t"
+ "aesdeclast %[tmpbuf1],%%xmm9\n\t"
+ "aesdeclast %[tmpbuf2],%%xmm10\n\t"
+ :
+ : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+ [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("aesdeclast %%xmm5, %%xmm11\n\t"
+ "pxor %[lxfkey], %%xmm11\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [lxfkey] "m" (*lxf_key)
+ : "memory" );
+ asm volatile ("movdqu %%xmm3, %[outbuf2]\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ "movdqu %%xmm8, %[outbuf4]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+ [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+ :
+ : "memory" );
+ asm volatile ("movdqu %%xmm9, %[outbuf5]\n\t"
+ "movdqu %%xmm10, %[outbuf6]\n\t"
+ "movdqu %%xmm11, %[outbuf7]\n\t"
+ : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+ [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
+ [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ outbuf += 8*BLOCKSIZE;
+ inbuf += 8*BLOCKSIZE;
+ }
+
+ asm volatile ("pxor %[first_key], %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm0\n\t"
+ "movdqu %%xmm0, %[lxfkey]\n\t"
+ : [lxfkey] "=m" (*lxf_key)
+ : [first_key] "m" (ctx->keyschdec[0][0][0])
+ : "memory" );
+
+ aesni_cleanup_8_15();
+ }
+#endif
+
+ for ( ;nblocks >= 4 ; nblocks -= 4 )
+ {
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* C_i = Offset_i xor DECIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "movdqu %[l0l1], %%xmm3\n\t"
+ :
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l0l1] "m" (*c->u_mode.ocb.L0L1),
+ [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm4\n\t"
+ "movdqu %[l3], %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqa %%xmm0, %[tmpbuf0]\n\t"
+ : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE))
+ : [l1] "m" (*c->u_mode.ocb.L[1]),
+ [l3] "m" (*l)
+ : "memory" );
+ asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm3, %%xmm2\n\t"
+ "movdqa %%xmm3, %[tmpbuf1]\n\t"
+ : [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+ : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqa %%xmm0, %[tmpbuf2]\n\t"
+ : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+ :
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("pxor %%xmm6, %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm5\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_dec_vec4 (ctx);
+
+ asm volatile ("pxor %[tmpbuf0],%%xmm1\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "pxor %[tmpbuf1],%%xmm2\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("pxor %[tmpbuf2],%%xmm3\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+ : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+ : "memory" );
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+
+ for ( ;nblocks; nblocks-- )
+ {
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_dec (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ : [iv] "=m" (*c->u_iv.iv)
+ :
+ : "memory" );
+
+ asm volatile ("pxor %%xmm0, %%xmm0\n\t"
+ "movdqa %%xmm0, %[tmpbuf0]\n\t"
+ "movdqa %%xmm0, %[tmpbuf1]\n\t"
+ "movdqa %%xmm0, %[tmpbuf2]\n\t"
+ : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+ [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)),
+ [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ aesni_ocb_checksum (c, outbuf_arg, nblocks_arg);
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+
+ return 0;
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ if (encrypt)
+ return aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+ else
+ return aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ u64 n = c->u_mode.ocb.aad_nblocks;
+ const unsigned char *l;
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare ();
+ aesni_prepare_2_7 ();
+
+ /* Preload Offset and Sum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "m" (*c->u_mode.ocb.aad_sum)
+ : "memory" );
+
+ for ( ;nblocks && n % 4; nblocks-- )
+ {
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[abuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [abuf] "m" (*abuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm0, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += BLOCKSIZE;
+ }
+
+#ifdef __x86_64__
+ if (nblocks >= 8)
+ {
+ aesni_prepare_8_15_variable;
+
+ aesni_prepare_8_15();
+
+ asm volatile ("movdqu %[l0], %%xmm7\n\t"
+ "movdqu %[l0l1], %%xmm12\n\t"
+ "movdqu %[l1], %%xmm13\n\t"
+ :
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l0l1] "m" (*c->u_mode.ocb.L0L1),
+ [l1] "m" (*c->u_mode.ocb.L[1])
+ : "memory" );
+
+ for ( ;nblocks >= 8 ; nblocks -= 8 )
+ {
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ "pxor %%xmm13, %%xmm0\n\t"
+ :
+ : [l3] "m" (*l)
+ : "memory" );
+
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ asm volatile ("movdqu %[l7], %%xmm14\n\t"
+ "pxor %%xmm13, %%xmm14\n\t"
+ :
+ : [l7] "m" (*l)
+ : "memory" );
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[abuf0], %%xmm1\n\t"
+ "movdqu %[abuf1], %%xmm2\n\t"
+ "movdqu %[abuf2], %%xmm3\n\t"
+ "movdqu %[abuf3], %%xmm4\n\t"
+ :
+ : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)),
+ [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)),
+ [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)),
+ [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[abuf4], %%xmm8\n\t"
+ "movdqu %[abuf5], %%xmm9\n\t"
+ "movdqu %[abuf6], %%xmm10\n\t"
+ "movdqu %[abuf7], %%xmm11\n\t"
+ :
+ : [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)),
+ [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)),
+ [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)),
+ [abuf7] "m" (*(abuf + 7 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("pxor %%xmm7, %%xmm1\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+
+ "pxor %%xmm12, %%xmm2\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+
+ "pxor %%xmm13, %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+
+ "pxor %%xmm0, %%xmm5\n\t"
+ "movdqa (%[key]), %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+
+ "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */
+ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */
+ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */
+ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */
+
+ "pxor %%xmm7, %%xmm8\n\t"
+ "pxor %%xmm5, %%xmm8\n\t"
+
+ "pxor %%xmm12, %%xmm9\n\t"
+ "pxor %%xmm5, %%xmm9\n\t"
+
+ "pxor %%xmm13, %%xmm10\n\t"
+ "pxor %%xmm5, %%xmm10\n\t"
+
+ "pxor %%xmm14, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm11\n\t"
+
+ "pxor %%xmm0, %%xmm8\n\t" /* xmm8 ^= key[0] */
+ "pxor %%xmm0, %%xmm9\n\t" /* xmm9 ^= key[0] */
+ "pxor %%xmm0, %%xmm10\n\t" /* xmm10 ^= key[0] */
+ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 ^= key[0] */
+ :
+ : [key] "r" (ctx->keyschenc)
+ : "memory" );
+
+ do_aesni_enc_vec8 (ctx);
+
+ asm volatile (
+ "aesenclast %%xmm0, %%xmm1\n\t"
+ "aesenclast %%xmm0, %%xmm2\n\t"
+ "aesenclast %%xmm0, %%xmm3\n\t"
+ "aesenclast %%xmm0, %%xmm4\n\t"
+ "aesenclast %%xmm0, %%xmm8\n\t"
+ "aesenclast %%xmm0, %%xmm9\n\t"
+ "aesenclast %%xmm0, %%xmm10\n\t"
+ "aesenclast %%xmm0, %%xmm11\n\t"
+ "pxor %%xmm2, %%xmm1\n\t"
+ "pxor %%xmm3, %%xmm1\n\t"
+ "pxor %%xmm4, %%xmm1\n\t"
+ "pxor %%xmm8, %%xmm1\n\t"
+ "pxor %%xmm9, %%xmm6\n\t"
+ "pxor %%xmm10, %%xmm6\n\t"
+ "pxor %%xmm11, %%xmm6\n\t"
+ "pxor %%xmm1, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += 8*BLOCKSIZE;
+ }
+
+ aesni_cleanup_8_15();
+ }
+#endif
+
+ for ( ;nblocks >= 4 ; nblocks -= 4 )
+ {
+ n += 4;
+ l = aes_ocb_get_l(c, n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[abuf0], %%xmm1\n\t"
+ "movdqu %[l0l1], %%xmm3\n\t"
+ :
+ : [l0] "m" (*c->u_mode.ocb.L[0]),
+ [l0l1] "m" (*c->u_mode.ocb.L0L1),
+ [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm4\n\t"
+ "movdqu %[l3], %%xmm7\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ :
+ : [l1] "m" (*c->u_mode.ocb.L[1]),
+ [l3] "m" (*l)
+ : "memory" );
+ asm volatile ("movdqu %[abuf1], %%xmm2\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm3, %%xmm2\n\t"
+ :
+ : [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "movdqu %[abuf2], %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ :
+ : [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("pxor %%xmm7, %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm5\n\t"
+ "movdqu %[abuf3], %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile ("pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm4, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += 4*BLOCKSIZE;
+ }
+
+ for ( ;nblocks; nblocks-- )
+ {
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[abuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [abuf] "m" (*abuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm0, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.aad_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "=m" (*c->u_mode.ocb.aad_sum)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+
+ return 0;
+}
+
+
+static const u64 xts_gfmul_const[16] __attribute__ ((aligned (16))) =
+ { 0x87, 0x01 };
+
+
+static void ASM_FUNC_ATTR
+_gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare ();
+ aesni_prepare_2_7 ();
+
+ /* Preload Tweak */
+ asm volatile ("movdqu %[tweak], %%xmm5\n\t"
+ "movdqa %[gfmul], %%xmm6\n\t"
+ :
+ : [tweak] "m" (*tweak),
+ [gfmul] "m" (*xts_gfmul_const)
+ : "memory" );
+
+ for ( ;nblocks >= 4; nblocks -= 4 )
+ {
+ asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm5, %[outbuf0]\n\t"
+
+ "movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %%xmm4, %%xmm4\n\t"
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * 16))
+ : [inbuf0] "m" (*(inbuf + 0 * 16))
+ : "memory" );
+
+ asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "movdqu %%xmm5, %[outbuf1]\n\t"
+
+ "movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %%xmm4, %%xmm4\n\t"
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * 16))
+ : [inbuf1] "m" (*(inbuf + 1 * 16))
+ : "memory" );
+
+ asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm5, %[outbuf2]\n\t"
+
+ "movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %%xmm4, %%xmm4\n\t"
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * 16))
+ : [inbuf2] "m" (*(inbuf + 2 * 16))
+ : "memory" );
+
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm5, %[outbuf3]\n\t"
+
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf3] "=m" (*(outbuf + 3 * 16))
+ : [inbuf3] "m" (*(inbuf + 3 * 16))
+ : "memory" );
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile ("movdqu %[outbuf0], %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %[outbuf1], %%xmm0\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %[outbuf2], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm2\n\t"
+ "movdqu %[outbuf3], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm4\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ : [outbuf0] "+m" (*(outbuf + 0 * 16)),
+ [outbuf1] "+m" (*(outbuf + 1 * 16)),
+ [outbuf2] "+m" (*(outbuf + 2 * 16)),
+ [outbuf3] "+m" (*(outbuf + 3 * 16))
+ :
+ : "memory" );
+
+ outbuf += BLOCKSIZE * 4;
+ inbuf += BLOCKSIZE * 4;
+ }
+
+ for ( ;nblocks; nblocks-- )
+ {
+ asm volatile ("movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "movdqa %%xmm5, %%xmm4\n\t"
+
+ "pshufd $0x13, %%xmm5, %%xmm1\n\t"
+ "psrad $31, %%xmm1\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ :
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm4, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm5, %[tweak]\n\t"
+ : [tweak] "=m" (*tweak)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+}
+
+
+static void ASM_FUNC_ATTR
+_gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ aesni_prepare_2_7_variable;
+
+ aesni_prepare ();
+ aesni_prepare_2_7 ();
+
+ if ( !ctx->decryption_prepared )
+ {
+ do_aesni_prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+
+ /* Preload Tweak */
+ asm volatile ("movdqu %[tweak], %%xmm5\n\t"
+ "movdqa %[gfmul], %%xmm6\n\t"
+ :
+ : [tweak] "m" (*tweak),
+ [gfmul] "m" (*xts_gfmul_const)
+ : "memory" );
+
+ for ( ;nblocks >= 4; nblocks -= 4 )
+ {
+ asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm5, %[outbuf0]\n\t"
+
+ "movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %%xmm4, %%xmm4\n\t"
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * 16))
+ : [inbuf0] "m" (*(inbuf + 0 * 16))
+ : "memory" );
+
+ asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "movdqu %%xmm5, %[outbuf1]\n\t"
+
+ "movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %%xmm4, %%xmm4\n\t"
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * 16))
+ : [inbuf1] "m" (*(inbuf + 1 * 16))
+ : "memory" );
+
+ asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm5, %[outbuf2]\n\t"
+
+ "movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %%xmm4, %%xmm4\n\t"
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * 16))
+ : [inbuf2] "m" (*(inbuf + 2 * 16))
+ : "memory" );
+
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm5, %[outbuf3]\n\t"
+
+ "psrad $31, %%xmm0\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ : [outbuf3] "=m" (*(outbuf + 3 * 16))
+ : [inbuf3] "m" (*(inbuf + 3 * 16))
+ : "memory" );
+
+ do_aesni_dec_vec4 (ctx);
+
+ asm volatile ("movdqu %[outbuf0], %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %[outbuf1], %%xmm0\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %[outbuf2], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm2\n\t"
+ "movdqu %[outbuf3], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm4\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ : [outbuf0] "+m" (*(outbuf + 0 * 16)),
+ [outbuf1] "+m" (*(outbuf + 1 * 16)),
+ [outbuf2] "+m" (*(outbuf + 2 * 16)),
+ [outbuf3] "+m" (*(outbuf + 3 * 16))
+ :
+ : "memory" );
+
+ outbuf += BLOCKSIZE * 4;
+ inbuf += BLOCKSIZE * 4;
+ }
+
+ for ( ;nblocks; nblocks-- )
+ {
+ asm volatile ("movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ "movdqa %%xmm5, %%xmm4\n\t"
+
+ "pshufd $0x13, %%xmm5, %%xmm1\n\t"
+ "psrad $31, %%xmm1\n\t"
+ "paddq %%xmm5, %%xmm5\n\t"
+ "pand %%xmm6, %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ :
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_dec (ctx);
+
+ asm volatile ("pxor %%xmm4, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm5, %[tweak]\n\t"
+ : [tweak] "=m" (*tweak)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks, int encrypt)
+{
+ if (encrypt)
+ _gcry_aes_aesni_xts_enc(ctx, tweak, outbuf, inbuf, nblocks);
+ else
+ _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks);
+}
+
+#if __clang__
+# pragma clang attribute pop
+#endif
+
+#endif /* USE_AESNI */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-amd64.S b/comm/third_party/libgcrypt/cipher/rijndael-amd64.S
new file mode 100644
index 0000000000..3dcaa856b7
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-amd64.S
@@ -0,0 +1,477 @@
+/* rinjdael-amd64.S - AMD64 assembly implementation of AES cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_AES)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* table macros */
+#define E0 (0)
+#define Es0 (1)
+#define Esize 4
+#define Essize 4
+
+#define D0 (0)
+#define Ds0 (4 * 256)
+#define Dsize 4
+#define Dssize 1
+
+/* register macros */
+#define CTX %rdi
+#define RTAB %r12
+
+#define RA %rax
+#define RB %rbx
+#define RC %rcx
+#define RD %rdx
+
+#define RAd %eax
+#define RBd %ebx
+#define RCd %ecx
+#define RDd %edx
+
+#define RAbl %al
+#define RBbl %bl
+#define RCbl %cl
+#define RDbl %dl
+
+#define RAbh %ah
+#define RBbh %bh
+#define RCbh %ch
+#define RDbh %dh
+
+#define RNA %r8
+#define RNB %r9
+#define RNC %r10
+#define RND %r11
+
+#define RNAd %r8d
+#define RNBd %r9d
+#define RNCd %r10d
+#define RNDd %r11d
+
+#define RT0 %rbp
+#define RT1 %rsi
+
+#define RT0d %ebp
+#define RT1d %esi
+
+/* helper macros */
+#define do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
+ movzbl source ## bl, t0 ## d; \
+ movzbl source ## bh, t1 ## d; \
+ op ## l table1(RTAB,t0,tablemul), dest1 ## d; \
+ op ## l table2(RTAB,t1,tablemul), dest2 ## d;
+
+#define do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
+ movzbl source ## bl, t0 ## d; \
+ movzbl source ## bh, t1 ## d; \
+ shrl $(shf), source ## d; \
+ op ## l table1(RTAB,t0,tablemul), dest1 ## d; \
+ op ## l table2(RTAB,t1,tablemul), dest2 ## d;
+
+#define last_do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
+ movzbl source ## bl, t0 ## d; \
+ movzbl source ## bh, t1 ## d; \
+ movzbl table1(RTAB,t0,tablemul), t0 ## d; \
+ movzbl table2(RTAB,t1,tablemul), t1 ## d; \
+ op ## l t0 ## d, dest1 ## d; \
+ op ## l t1 ## d, dest2 ## d;
+
+#define last_do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
+ movzbl source ## bl, t0 ## d; \
+ movzbl source ## bh, t1 ## d; \
+ shrl $(shf), source ## d; \
+ movzbl table1(RTAB,t0,tablemul), t0 ## d; \
+ movzbl table2(RTAB,t1,tablemul), t1 ## d; \
+ op ## l t0 ## d, dest1 ## d; \
+ op ## l t1 ## d, dest2 ## d;
+
+/***********************************************************************
+ * AMD64 assembly implementation of the AES cipher
+ ***********************************************************************/
+#define addroundkey(round, ra, rb, rc, rd) \
+ xorl (((round) * 16) + 0 * 4)(CTX), ra ## d; \
+ xorl (((round) * 16) + 1 * 4)(CTX), rb ## d; \
+ xorl (((round) * 16) + 2 * 4)(CTX), rc ## d; \
+ xorl (((round) * 16) + 3 * 4)(CTX), rd ## d;
+
+#define do_encround(next_r) \
+ do16bit_shr(16, mov, RA, Esize, E0, RNA, E0, RND, RT0, RT1); \
+ do16bit( mov, RA, Esize, E0, RNC, E0, RNB, RT0, RT1); \
+ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+ roll $8, RNDd; \
+ xorl RNAd, RAd; \
+ roll $8, RNCd; \
+ roll $8, RNBd; \
+ roll $8, RAd; \
+ \
+ do16bit_shr(16, xor, RD, Esize, E0, RND, E0, RNC, RT0, RT1); \
+ do16bit( xor, RD, Esize, E0, RNB, E0, RA, RT0, RT1); \
+ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+ roll $8, RNCd; \
+ xorl RNDd, RDd; \
+ roll $8, RNBd; \
+ roll $8, RAd; \
+ roll $8, RDd; \
+ \
+ do16bit_shr(16, xor, RC, Esize, E0, RNC, E0, RNB, RT0, RT1); \
+ do16bit( xor, RC, Esize, E0, RA, E0, RD, RT0, RT1); \
+ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+ roll $8, RNBd; \
+ xorl RNCd, RCd; \
+ roll $8, RAd; \
+ roll $8, RDd; \
+ roll $8, RCd; \
+ \
+ do16bit_shr(16, xor, RB, Esize, E0, RNB, E0, RA, RT0, RT1); \
+ do16bit( xor, RB, Esize, E0, RD, E0, RC, RT0, RT1); \
+ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+ roll $8, RAd; \
+ xorl RNBd, RBd; \
+ roll $16, RDd; \
+ roll $24, RCd;
+
+#define do_lastencround(next_r) \
+ do16bit_shr(16, movzb, RA, Essize, Es0, RNA, Es0, RND, RT0, RT1); \
+ do16bit( movzb, RA, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \
+ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+ roll $8, RNDd; \
+ xorl RNAd, RAd; \
+ roll $8, RNCd; \
+ roll $8, RNBd; \
+ roll $8, RAd; \
+ \
+ last_do16bit_shr(16, xor, RD, Essize, Es0, RND, Es0, RNC, RT0, RT1); \
+ last_do16bit( xor, RD, Essize, Es0, RNB, Es0, RA, RT0, RT1); \
+ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+ roll $8, RNCd; \
+ xorl RNDd, RDd; \
+ roll $8, RNBd; \
+ roll $8, RAd; \
+ roll $8, RDd; \
+ \
+ last_do16bit_shr(16, xor, RC, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \
+ last_do16bit( xor, RC, Essize, Es0, RA, Es0, RD, RT0, RT1); \
+ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+ roll $8, RNBd; \
+ xorl RNCd, RCd; \
+ roll $8, RAd; \
+ roll $8, RDd; \
+ roll $8, RCd; \
+ \
+ last_do16bit_shr(16, xor, RB, Essize, Es0, RNB, Es0, RA, RT0, RT1); \
+ last_do16bit( xor, RB, Essize, Es0, RD, Es0, RC, RT0, RT1); \
+ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+ roll $8, RAd; \
+ xorl RNBd, RBd; \
+ roll $16, RDd; \
+ roll $24, RCd;
+
+#define firstencround(round) \
+ addroundkey(round, RA, RB, RC, RD); \
+ do_encround((round) + 1);
+
+#define encround(round) \
+ do_encround((round) + 1);
+
+#define lastencround(round) \
+ do_lastencround((round) + 1);
+
+.align 8
+.globl _gcry_aes_amd64_encrypt_block
+ELF(.type _gcry_aes_amd64_encrypt_block,@function;)
+
+_gcry_aes_amd64_encrypt_block:
+ /* input:
+ * %rdi: keysched, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %ecx: number of rounds.. 10, 12 or 14
+ * %r8: encryption tables
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_5
+
+ subq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(5 * 8);
+ movq %rsi, (0 * 8)(%rsp);
+ movl %ecx, (1 * 8)(%rsp);
+ movq %rbp, (2 * 8)(%rsp);
+ movq %rbx, (3 * 8)(%rsp);
+ movq %r12, (4 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 2 * 8);
+ CFI_REL_OFFSET(%rbx, 3 * 8);
+ CFI_REL_OFFSET(%r12, 4 * 8);
+
+ leaq (%r8), RTAB;
+
+ /* read input block */
+ movl 0 * 4(%rdx), RAd;
+ movl 1 * 4(%rdx), RBd;
+ movl 2 * 4(%rdx), RCd;
+ movl 3 * 4(%rdx), RDd;
+
+ firstencround(0);
+ encround(1);
+ encround(2);
+ encround(3);
+ encround(4);
+ encround(5);
+ encround(6);
+ encround(7);
+ encround(8);
+ cmpl $12, (1 * 8)(%rsp);
+ jnb .Lenc_not_128;
+ lastencround(9);
+
+.align 4
+.Lenc_done:
+ /* write output block */
+ movq (0 * 8)(%rsp), %rsi;
+ movl RAd, 0 * 4(%rsi);
+ movl RBd, 1 * 4(%rsi);
+ movl RCd, 2 * 4(%rsi);
+ movl RDd, 3 * 4(%rsi);
+
+ CFI_REMEMBER_STATE();
+
+ movq (4 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
+ addq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-5 * 8);
+
+ movl $(6 * 8), %eax;
+
+ EXIT_SYSV_FUNC
+ ret;
+
+ CFI_RESTORE_STATE();
+.align 4
+.Lenc_not_128:
+ je .Lenc_192
+
+ encround(9);
+ encround(10);
+ encround(11);
+ encround(12);
+ lastencround(13);
+
+ jmp .Lenc_done;
+
+.align 4
+.Lenc_192:
+ encround(9);
+ encround(10);
+ lastencround(11);
+
+ jmp .Lenc_done;
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;)
+
+#define do_decround(next_r) \
+ do16bit_shr(16, mov, RA, Dsize, D0, RNA, D0, RNB, RT0, RT1); \
+ do16bit( mov, RA, Dsize, D0, RNC, D0, RND, RT0, RT1); \
+ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+ roll $8, RNBd; \
+ xorl RNAd, RAd; \
+ roll $8, RNCd; \
+ roll $8, RNDd; \
+ roll $8, RAd; \
+ \
+ do16bit_shr(16, xor, RB, Dsize, D0, RNB, D0, RNC, RT0, RT1); \
+ do16bit( xor, RB, Dsize, D0, RND, D0, RA, RT0, RT1); \
+ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+ roll $8, RNCd; \
+ xorl RNBd, RBd; \
+ roll $8, RNDd; \
+ roll $8, RAd; \
+ roll $8, RBd; \
+ \
+ do16bit_shr(16, xor, RC, Dsize, D0, RNC, D0, RND, RT0, RT1); \
+ do16bit( xor, RC, Dsize, D0, RA, D0, RB, RT0, RT1); \
+ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+ roll $8, RNDd; \
+ xorl RNCd, RCd; \
+ roll $8, RAd; \
+ roll $8, RBd; \
+ roll $8, RCd; \
+ \
+ do16bit_shr(16, xor, RD, Dsize, D0, RND, D0, RA, RT0, RT1); \
+ do16bit( xor, RD, Dsize, D0, RB, D0, RC, RT0, RT1); \
+ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+ roll $8, RAd; \
+ xorl RNDd, RDd; \
+ roll $16, RBd; \
+ roll $24, RCd;
+
+#define do_lastdecround(next_r) \
+ do16bit_shr(16, movzb, RA, Dssize, Ds0, RNA, Ds0, RNB, RT0, RT1); \
+ do16bit( movzb, RA, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \
+ movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+ roll $8, RNBd; \
+ xorl RNAd, RAd; \
+ roll $8, RNCd; \
+ roll $8, RNDd; \
+ roll $8, RAd; \
+ \
+ last_do16bit_shr(16, xor, RB, Dssize, Ds0, RNB, Ds0, RNC, RT0, RT1); \
+ last_do16bit( xor, RB, Dssize, Ds0, RND, Ds0, RA, RT0, RT1); \
+ movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+ roll $8, RNCd; \
+ xorl RNBd, RBd; \
+ roll $8, RNDd; \
+ roll $8, RAd; \
+ roll $8, RBd; \
+ \
+ last_do16bit_shr(16, xor, RC, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \
+ last_do16bit( xor, RC, Dssize, Ds0, RA, Ds0, RB, RT0, RT1); \
+ movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+ roll $8, RNDd; \
+ xorl RNCd, RCd; \
+ roll $8, RAd; \
+ roll $8, RBd; \
+ roll $8, RCd; \
+ \
+ last_do16bit_shr(16, xor, RD, Dssize, Ds0, RND, Ds0, RA, RT0, RT1); \
+ last_do16bit( xor, RD, Dssize, Ds0, RB, Ds0, RC, RT0, RT1); \
+ movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+ roll $8, RAd; \
+ xorl RNDd, RDd; \
+ roll $16, RBd; \
+ roll $24, RCd;
+
+#define firstdecround(round) \
+ addroundkey((round + 1), RA, RB, RC, RD); \
+ do_decround(round);
+
+#define decround(round) \
+ do_decround(round);
+
+#define lastdecround(round) \
+ do_lastdecround(round);
+
+.align 8
+.globl _gcry_aes_amd64_decrypt_block
+ELF(.type _gcry_aes_amd64_decrypt_block,@function;)
+
+_gcry_aes_amd64_decrypt_block:
+ /* input:
+ * %rdi: keysched, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %ecx: number of rounds.. 10, 12 or 14
+ * %r8: decryption tables
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_5
+
+ subq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(5 * 8);
+ movq %rsi, (0 * 8)(%rsp);
+ movl %ecx, (1 * 8)(%rsp);
+ movq %rbp, (2 * 8)(%rsp);
+ movq %rbx, (3 * 8)(%rsp);
+ movq %r12, (4 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 2 * 8);
+ CFI_REL_OFFSET(%rbx, 3 * 8);
+ CFI_REL_OFFSET(%r12, 4 * 8);
+
+ leaq (%r8), RTAB;
+
+ /* read input block */
+ movl 0 * 4(%rdx), RAd;
+ movl 1 * 4(%rdx), RBd;
+ movl 2 * 4(%rdx), RCd;
+ movl 3 * 4(%rdx), RDd;
+
+ cmpl $12, (1 * 8)(%rsp);
+ jnb .Ldec_256;
+
+ firstdecround(9);
+.align 4
+.Ldec_tail:
+ decround(8);
+ decround(7);
+ decround(6);
+ decround(5);
+ decround(4);
+ decround(3);
+ decround(2);
+ decround(1);
+ lastdecround(0);
+
+ /* write output block */
+ movq (0 * 8)(%rsp), %rsi;
+ movl RAd, 0 * 4(%rsi);
+ movl RBd, 1 * 4(%rsi);
+ movl RCd, 2 * 4(%rsi);
+ movl RDd, 3 * 4(%rsi);
+
+ CFI_REMEMBER_STATE();
+
+ movq (4 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
+ addq $(5 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-5 * 8);
+
+ movl $(6 * 8), %eax;
+
+ EXIT_SYSV_FUNC
+ ret;
+
+ CFI_RESTORE_STATE();
+.align 4
+.Ldec_256:
+ je .Ldec_192;
+
+ firstdecround(13);
+ decround(12);
+ decround(11);
+ decround(10);
+ decround(9);
+
+ jmp .Ldec_tail;
+
+.align 4
+.Ldec_192:
+ firstdecround(11);
+ decround(10);
+ decround(9);
+
+ jmp .Ldec_tail;
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;)
+
+#endif /*USE_AES*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-arm.S b/comm/third_party/libgcrypt/cipher/rijndael-arm.S
new file mode 100644
index 0000000000..e680c817b2
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-arm.S
@@ -0,0 +1,581 @@
+/* rijndael-arm.S - ARM assembly implementation of AES cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* register macros */
+#define CTX %r0
+#define RTAB %lr
+#define RMASK %ip
+
+#define RA %r4
+#define RB %r5
+#define RC %r6
+#define RD %r7
+
+#define RNA %r8
+#define RNB %r9
+#define RNC %r10
+#define RND %r11
+
+#define RT0 %r1
+#define RT1 %r2
+#define RT2 %r3
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 0)]; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 3)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+ mov rtmp0, rin, lsr #8; \
+ strb rin, [rdst, #((offs) + 0)]; \
+ mov rtmp1, rin, lsr #16; \
+ strb rtmp0, [rdst, #((offs) + 1)]; \
+ mov rtmp0, rin, lsr #24; \
+ strb rtmp1, [rdst, #((offs) + 2)]; \
+ strb rtmp0, [rdst, #((offs) + 3)];
+
+/***********************************************************************
+ * ARM assembly implementation of the AES cipher
+ ***********************************************************************/
+#define preload_first_key(round, ra) \
+ ldr ra, [CTX, #(((round) * 16) + 0 * 4)];
+
+#define dummy(round, ra) /* nothing */
+
+#define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ ldm CTX, {rna, rnb, rnc, rnd}; \
+ eor ra, rna; \
+ eor rb, rnb; \
+ eor rc, rnc; \
+ preload_key(1, rna); \
+ eor rd, rnd;
+
+#define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+ \
+ and RT0, RMASK, ra, lsl#2; \
+ ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+ and RT1, RMASK, ra, lsr#(8 - 2); \
+ ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+ and RT2, RMASK, ra, lsr#(16 - 2); \
+ ldr RT0, [RTAB, RT0]; \
+ and ra, RMASK, ra, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rna, rna, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rd, lsl#2; \
+ ldr ra, [RTAB, ra]; \
+ \
+ eor rnd, rnd, RT1, ror #24; \
+ and RT1, RMASK, rd, lsr#(8 - 2); \
+ eor rnc, rnc, RT2, ror #16; \
+ and RT2, RMASK, rd, lsr#(16 - 2); \
+ eor rnb, rnb, ra, ror #8; \
+ ldr RT0, [RTAB, RT0]; \
+ and rd, RMASK, rd, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rnd, rnd, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rc, lsl#2; \
+ ldr rd, [RTAB, rd]; \
+ \
+ eor rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#(8 - 2); \
+ eor rnb, rnb, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#(16 - 2); \
+ eor rna, rna, rd, ror #8; \
+ ldr RT0, [RTAB, RT0]; \
+ and rc, RMASK, rc, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rnc, rnc, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rb, lsl#2; \
+ ldr rc, [RTAB, rc]; \
+ \
+ eor rnb, rnb, RT1, ror #24; \
+ and RT1, RMASK, rb, lsr#(8 - 2); \
+ eor rna, rna, RT2, ror #16; \
+ and RT2, RMASK, rb, lsr#(16 - 2); \
+ eor rnd, rnd, rc, ror #8; \
+ ldr RT0, [RTAB, RT0]; \
+ and rb, RMASK, rb, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rnb, rnb, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ eor rna, rna, RT1, ror #24; \
+ ldr rb, [RTAB, rb]; \
+ \
+ eor rnd, rnd, RT2, ror #16; \
+ preload_key((next_r) + 1, ra); \
+ eor rnc, rnc, rb, ror #8;
+
+#define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ and RT0, RMASK, ra, lsl#2; \
+ and RT1, RMASK, ra, lsr#(8 - 2); \
+ and RT2, RMASK, ra, lsr#(16 - 2); \
+ ldrb rna, [RTAB, RT0]; \
+ and ra, RMASK, ra, lsr#(24 - 2); \
+ ldrb rnd, [RTAB, RT1]; \
+ and RT0, RMASK, rd, lsl#2; \
+ ldrb rnc, [RTAB, RT2]; \
+ mov rnd, rnd, ror #24; \
+ ldrb rnb, [RTAB, ra]; \
+ and RT1, RMASK, rd, lsr#(8 - 2); \
+ mov rnc, rnc, ror #16; \
+ and RT2, RMASK, rd, lsr#(16 - 2); \
+ mov rnb, rnb, ror #8; \
+ ldrb RT0, [RTAB, RT0]; \
+ and rd, RMASK, rd, lsr#(24 - 2); \
+ ldrb RT1, [RTAB, RT1]; \
+ \
+ orr rnd, rnd, RT0; \
+ ldrb RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rc, lsl#2; \
+ ldrb rd, [RTAB, rd]; \
+ orr rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#(8 - 2); \
+ orr rnb, rnb, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#(16 - 2); \
+ orr rna, rna, rd, ror #8; \
+ ldrb RT0, [RTAB, RT0]; \
+ and rc, RMASK, rc, lsr#(24 - 2); \
+ ldrb RT1, [RTAB, RT1]; \
+ \
+ orr rnc, rnc, RT0; \
+ ldrb RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rb, lsl#2; \
+ ldrb rc, [RTAB, rc]; \
+ orr rnb, rnb, RT1, ror #24; \
+ and RT1, RMASK, rb, lsr#(8 - 2); \
+ orr rna, rna, RT2, ror #16; \
+ ldrb RT0, [RTAB, RT0]; \
+ and RT2, RMASK, rb, lsr#(16 - 2); \
+ ldrb RT1, [RTAB, RT1]; \
+ orr rnd, rnd, rc, ror #8; \
+ ldrb RT2, [RTAB, RT2]; \
+ and rb, RMASK, rb, lsr#(24 - 2); \
+ ldrb rb, [RTAB, rb]; \
+ \
+ orr rnb, rnb, RT0; \
+ orr rna, rna, RT1, ror #24; \
+ orr rnd, rnd, RT2, ror #16; \
+ orr rnc, rnc, rb, ror #8;
+
+#define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \
+ do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key);
+
+#define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key);
+
+#define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ add CTX, #(((round) + 1) * 16); \
+ add RTAB, #1; \
+ do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.align 3
+.globl _gcry_aes_arm_encrypt_block
+.type _gcry_aes_arm_encrypt_block,%function;
+
+_gcry_aes_arm_encrypt_block:
+ /* input:
+ * %r0: keysched, CTX
+ * %r1: dst
+ * %r2: src
+ * %r3: number of rounds.. 10, 12 or 14
+ * %st+0: encryption table
+ */
+ push {%r4-%r11, %ip, %lr};
+
+ /* read input block */
+
+ /* test if src is unaligned */
+ tst %r2, #3;
+ beq 1f;
+
+ /* unaligned load */
+ ldr_unaligned_le(RA, %r2, 0, RNA);
+ ldr_unaligned_le(RB, %r2, 4, RNB);
+ ldr_unaligned_le(RC, %r2, 8, RNA);
+ ldr_unaligned_le(RD, %r2, 12, RNB);
+ b 2f;
+.ltorg
+1:
+ /* aligned load */
+ ldm %r2, {RA, RB, RC, RD};
+#ifndef __ARMEL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+2:
+ ldr RTAB, [%sp, #40];
+ sub %sp, #16;
+
+ str %r1, [%sp, #4]; /* dst */
+ mov RMASK, #0xff;
+ str %r3, [%sp, #8]; /* nrounds */
+ mov RMASK, RMASK, lsl#2; /* byte mask */
+
+ firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+ encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+
+ ldr RT0, [%sp, #8]; /* nrounds */
+ cmp RT0, #12;
+ bge .Lenc_not_128;
+
+ encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+ lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+.Lenc_done:
+ ldr RT0, [%sp, #4]; /* dst */
+ add %sp, #16;
+
+ /* store output block */
+
+ /* test if dst is unaligned */
+ tst RT0, #3;
+ beq 1f;
+
+ /* unaligned store */
+ str_unaligned_le(RA, RT0, 0, RNA, RNB);
+ str_unaligned_le(RB, RT0, 4, RNA, RNB);
+ str_unaligned_le(RC, RT0, 8, RNA, RNB);
+ str_unaligned_le(RD, RT0, 12, RNA, RNB);
+ b 2f;
+.ltorg
+1:
+ /* aligned store */
+#ifndef __ARMEL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+ /* write output block */
+ stm RT0, {RA, RB, RC, RD};
+2:
+
+ mov r0, #(10 * 4);
+ pop {%r4-%r11, %ip, %pc};
+
+.ltorg
+.Lenc_not_128:
+ beq .Lenc_192
+
+ encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+ lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+ b .Lenc_done;
+
+.ltorg
+.Lenc_192:
+ encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+ lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+ b .Lenc_done;
+.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
+
+#define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
+ ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \
+ eor ra, rna; \
+ ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \
+ eor rb, rnb; \
+ ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \
+ eor rc, rnc; \
+ preload_first_key((round) - 1, rna); \
+ eor rd, rnd;
+
+#define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+ \
+ and RT0, RMASK, ra, lsl#2; \
+ ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+ and RT1, RMASK, ra, lsr#(8 - 2); \
+ ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+ and RT2, RMASK, ra, lsr#(16 - 2); \
+ ldr RT0, [RTAB, RT0]; \
+ and ra, RMASK, ra, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rna, rna, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rb, lsl#2; \
+ ldr ra, [RTAB, ra]; \
+ \
+ eor rnb, rnb, RT1, ror #24; \
+ and RT1, RMASK, rb, lsr#(8 - 2); \
+ eor rnc, rnc, RT2, ror #16; \
+ and RT2, RMASK, rb, lsr#(16 - 2); \
+ eor rnd, rnd, ra, ror #8; \
+ ldr RT0, [RTAB, RT0]; \
+ and rb, RMASK, rb, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rnb, rnb, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rc, lsl#2; \
+ ldr rb, [RTAB, rb]; \
+ \
+ eor rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#(8 - 2); \
+ eor rnd, rnd, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#(16 - 2); \
+ eor rna, rna, rb, ror #8; \
+ ldr RT0, [RTAB, RT0]; \
+ and rc, RMASK, rc, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rnc, rnc, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rd, lsl#2; \
+ ldr rc, [RTAB, rc]; \
+ \
+ eor rnd, rnd, RT1, ror #24; \
+ and RT1, RMASK, rd, lsr#(8 - 2); \
+ eor rna, rna, RT2, ror #16; \
+ and RT2, RMASK, rd, lsr#(16 - 2); \
+ eor rnb, rnb, rc, ror #8; \
+ ldr RT0, [RTAB, RT0]; \
+ and rd, RMASK, rd, lsr#(24 - 2); \
+ \
+ ldr RT1, [RTAB, RT1]; \
+ eor rnd, rnd, RT0; \
+ ldr RT2, [RTAB, RT2]; \
+ eor rna, rna, RT1, ror #24; \
+ ldr rd, [RTAB, rd]; \
+ \
+ eor rnb, rnb, RT2, ror #16; \
+ preload_key((next_r) - 1, ra); \
+ eor rnc, rnc, rd, ror #8;
+
+#define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ and RT0, RMASK, ra; \
+ and RT1, RMASK, ra, lsr#8; \
+ and RT2, RMASK, ra, lsr#16; \
+ ldrb rna, [RTAB, RT0]; \
+ mov ra, ra, lsr#24; \
+ ldrb rnb, [RTAB, RT1]; \
+ and RT0, RMASK, rb; \
+ ldrb rnc, [RTAB, RT2]; \
+ mov rnb, rnb, ror #24; \
+ ldrb rnd, [RTAB, ra]; \
+ and RT1, RMASK, rb, lsr#8; \
+ mov rnc, rnc, ror #16; \
+ and RT2, RMASK, rb, lsr#16; \
+ mov rnd, rnd, ror #8; \
+ ldrb RT0, [RTAB, RT0]; \
+ mov rb, rb, lsr#24; \
+ ldrb RT1, [RTAB, RT1]; \
+ \
+ orr rnb, rnb, RT0; \
+ ldrb RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rc; \
+ ldrb rb, [RTAB, rb]; \
+ orr rnc, rnc, RT1, ror #24; \
+ and RT1, RMASK, rc, lsr#8; \
+ orr rnd, rnd, RT2, ror #16; \
+ and RT2, RMASK, rc, lsr#16; \
+ orr rna, rna, rb, ror #8; \
+ ldrb RT0, [RTAB, RT0]; \
+ mov rc, rc, lsr#24; \
+ ldrb RT1, [RTAB, RT1]; \
+ \
+ orr rnc, rnc, RT0; \
+ ldrb RT2, [RTAB, RT2]; \
+ and RT0, RMASK, rd; \
+ ldrb rc, [RTAB, rc]; \
+ orr rnd, rnd, RT1, ror #24; \
+ and RT1, RMASK, rd, lsr#8; \
+ orr rna, rna, RT2, ror #16; \
+ ldrb RT0, [RTAB, RT0]; \
+ and RT2, RMASK, rd, lsr#16; \
+ ldrb RT1, [RTAB, RT1]; \
+ orr rnb, rnb, rc, ror #8; \
+ ldrb RT2, [RTAB, RT2]; \
+ mov rd, rd, lsr#24; \
+ ldrb rd, [RTAB, rd]; \
+ \
+ orr rnd, rnd, RT0; \
+ orr rna, rna, RT1, ror #24; \
+ orr rnb, rnb, RT2, ror #16; \
+ orr rnc, rnc, rd, ror #8;
+
+#define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+ do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key);
+
+#define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+ do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key);
+
+#define set_last_round_rmask(_, __) \
+ mov RMASK, #0xff;
+
+#define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+ add RTAB, #(4 * 256); \
+ do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.align 3
+.globl _gcry_aes_arm_decrypt_block
+.type _gcry_aes_arm_decrypt_block,%function;
+
+_gcry_aes_arm_decrypt_block:
+ /* input:
+ * %r0: keysched, CTX
+ * %r1: dst
+ * %r2: src
+ * %r3: number of rounds.. 10, 12 or 14
+ * %st+0: decryption table
+ */
+ push {%r4-%r11, %ip, %lr};
+
+ /* read input block */
+
+ /* test if src is unaligned */
+ tst %r2, #3;
+ beq 1f;
+
+ /* unaligned load */
+ ldr_unaligned_le(RA, %r2, 0, RNA);
+ ldr_unaligned_le(RB, %r2, 4, RNB);
+ ldr_unaligned_le(RC, %r2, 8, RNA);
+ ldr_unaligned_le(RD, %r2, 12, RNB);
+ b 2f;
+.ltorg
+1:
+ /* aligned load */
+ ldm %r2, {RA, RB, RC, RD};
+#ifndef __ARMEL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+2:
+ ldr RTAB, [%sp, #40];
+ sub %sp, #16;
+
+ mov RMASK, #0xff;
+ str %r1, [%sp, #4]; /* dst */
+ mov RMASK, RMASK, lsl#2; /* byte mask */
+
+ cmp %r3, #12;
+ bge .Ldec_256;
+
+ firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+.Ldec_tail:
+ decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask);
+ lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+ ldr RT0, [%sp, #4]; /* dst */
+ add %sp, #16;
+
+ /* store output block */
+
+ /* test if dst is unaligned */
+ tst RT0, #3;
+ beq 1f;
+
+ /* unaligned store */
+ str_unaligned_le(RA, RT0, 0, RNA, RNB);
+ str_unaligned_le(RB, RT0, 4, RNA, RNB);
+ str_unaligned_le(RC, RT0, 8, RNA, RNB);
+ str_unaligned_le(RD, RT0, 12, RNA, RNB);
+ b 2f;
+.ltorg
+1:
+ /* aligned store */
+#ifndef __ARMEL__
+ rev RA, RA;
+ rev RB, RB;
+ rev RC, RC;
+ rev RD, RD;
+#endif
+ /* write output block */
+ stm RT0, {RA, RB, RC, RD};
+2:
+ mov r0, #(10 * 4);
+ pop {%r4-%r11, %ip, %pc};
+
+.ltorg
+.Ldec_256:
+ beq .Ldec_192;
+
+ firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+ decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+ decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+ b .Ldec_tail;
+
+.ltorg
+.Ldec_192:
+ firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+ decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+ decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+ b .Ldec_tail;
+.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
+
+#endif /*HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS*/
+#endif /*__ARMEL__ */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S b/comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S
new file mode 100644
index 0000000000..66440bd4eb
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S
@@ -0,0 +1,1867 @@
+/* rijndael-armv8-aarch32-ce.S - ARMv8/CE accelerated AES
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* AES macros */
+
+#define aes_preload_keys(keysched, rekeysched) \
+ vldmia keysched!, {q5-q7}; \
+ mov rekeysched, keysched; \
+ vldmialo keysched!, {q8-q15}; /* 128-bit */ \
+ addeq keysched, #(2*16); \
+ vldmiaeq keysched!, {q10-q15}; /* 192-bit */ \
+ addhi keysched, #(4*16); \
+ vldmiahi keysched!, {q12-q15}; /* 256-bit */ \
+
+#define do_aes_one128(ed, mcimc, qo, qb) \
+ aes##ed.8 qb, q5; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q6; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q7; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q8; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q9; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q10; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q11; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q12; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q13; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q14; \
+ veor qo, qb, q15;
+
+#define do_aes_one128re(ed, mcimc, qo, qb, keysched, rekeysched) \
+ vldm rekeysched, {q8-q9}; \
+ do_aes_one128(ed, mcimc, qo, qb);
+
+#define do_aes_one192(ed, mcimc, qo, qb, keysched, rekeysched) \
+ vldm rekeysched!, {q8}; \
+ aes##ed.8 qb, q5; \
+ aes##mcimc.8 qb, qb; \
+ vldm rekeysched, {q9}; \
+ aes##ed.8 qb, q6; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q7; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q8; \
+ aes##mcimc.8 qb, qb; \
+ vldmia keysched!, {q8}; \
+ aes##ed.8 qb, q9; \
+ aes##mcimc.8 qb, qb; \
+ sub rekeysched, #(1*16); \
+ aes##ed.8 qb, q10; \
+ aes##mcimc.8 qb, qb; \
+ vldm keysched, {q9}; \
+ aes##ed.8 qb, q11; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q12; \
+ aes##mcimc.8 qb, qb; \
+ sub keysched, #16; \
+ aes##ed.8 qb, q13; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q14; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q15; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q8; \
+ veor qo, qb, q9; \
+
+#define do_aes_one256(ed, mcimc, qo, qb, keysched, rekeysched) \
+ vldmia rekeysched!, {q8}; \
+ aes##ed.8 qb, q5; \
+ aes##mcimc.8 qb, qb; \
+ vldmia rekeysched!, {q9}; \
+ aes##ed.8 qb, q6; \
+ aes##mcimc.8 qb, qb; \
+ vldmia rekeysched!, {q10}; \
+ aes##ed.8 qb, q7; \
+ aes##mcimc.8 qb, qb; \
+ vldm rekeysched, {q11}; \
+ aes##ed.8 qb, q8; \
+ aes##mcimc.8 qb, qb; \
+ vldmia keysched!, {q8}; \
+ aes##ed.8 qb, q9; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q10; \
+ aes##mcimc.8 qb, qb; \
+ vldmia keysched!, {q9}; \
+ aes##ed.8 qb, q11; \
+ aes##mcimc.8 qb, qb; \
+ sub rekeysched, #(3*16); \
+ aes##ed.8 qb, q12; \
+ aes##mcimc.8 qb, qb; \
+ vldmia keysched!, {q10}; \
+ aes##ed.8 qb, q13; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q14; \
+ aes##mcimc.8 qb, qb; \
+ vldm keysched, {q11}; \
+ aes##ed.8 qb, q15; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q8; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q9; \
+ aes##mcimc.8 qb, qb; \
+ aes##ed.8 qb, q10; \
+ veor qo, qb, q11; \
+ sub keysched, #(3*16); \
+
+#define aes_round_4(ed, mcimc, b0, b1, b2, b3, key) \
+ aes##ed.8 b0, key; \
+ aes##mcimc.8 b0, b0; \
+ aes##ed.8 b1, key; \
+ aes##mcimc.8 b1, b1; \
+ aes##ed.8 b2, key; \
+ aes##mcimc.8 b2, b2; \
+ aes##ed.8 b3, key; \
+ aes##mcimc.8 b3, b3;
+
+#define do_aes_4_128(ed, mcimc, b0, b1, b2, b3) \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \
+ aes##ed.8 b0, q14; \
+ veor b0, b0, q15; \
+ aes##ed.8 b1, q14; \
+ veor b1, b1, q15; \
+ aes##ed.8 b2, q14; \
+ veor b2, b2, q15; \
+ aes##ed.8 b3, q14; \
+ veor b3, b3, q15;
+
+#define do_aes_4_128re(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \
+ vldm rekeysched, {q8-q9}; \
+ do_aes_4_128(ed, mcimc, b0, b1, b2, b3);
+
+#define do_aes_4_192(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \
+ vldm rekeysched!, {q8}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \
+ vldm rekeysched, {q9}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+ vldmia keysched!, {q8}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+ sub rekeysched, #(1*16); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \
+ vldm keysched, {q9}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \
+ sub keysched, #16; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q14); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q15); \
+ aes##ed.8 b0, q8; \
+ veor b0, b0, q9; \
+ aes##ed.8 b1, q8; \
+ veor b1, b1, q9; \
+ aes##ed.8 b2, q8; \
+ veor b2, b2, q9; \
+ aes##ed.8 b3, q8; \
+ veor b3, b3, q9;
+
+#define do_aes_4_256(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \
+ vldmia rekeysched!, {q8}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \
+ vldmia rekeysched!, {q9}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \
+ vldmia rekeysched!, {q10}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \
+ vldm rekeysched, {q11}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+ vldmia keysched!, {q8}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \
+ vldmia keysched!, {q9}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \
+ sub rekeysched, #(3*16); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \
+ vldmia keysched!, {q10}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q14); \
+ vldm keysched, {q11}; \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q15); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+ sub keysched, #(3*16); \
+ aes##ed.8 b0, q10; \
+ veor b0, b0, q11; \
+ aes##ed.8 b1, q10; \
+ veor b1, b1, q11; \
+ aes##ed.8 b2, q10; \
+ veor b2, b2, q11; \
+ aes##ed.8 b3, q10; \
+ veor b3, b3, q11;
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) veor reg, reg;
+
+
+/*
+ * unsigned int _gcry_aes_enc_armv8_ce(void *keysched, byte *dst,
+ * const byte *src,
+ * unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_enc_armv8_ce
+.type _gcry_aes_enc_armv8_ce,%function;
+_gcry_aes_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: dst
+ * r2: src
+ * r3: nrounds
+ */
+
+ vldmia r0!, {q1-q3} /* load 3 round keys */
+
+ cmp r3, #12
+
+ vld1.8 {q0}, [r2]
+
+ bhi .Lenc1_256
+ beq .Lenc1_192
+
+.Lenc1_128:
+
+.Lenc1_tail:
+ vldmia r0, {q8-q15} /* load 8 round keys */
+
+ aese.8 q0, q1
+ aesmc.8 q0, q0
+ CLEAR_REG(q1)
+
+ aese.8 q0, q2
+ aesmc.8 q0, q0
+ CLEAR_REG(q2)
+
+ aese.8 q0, q3
+ aesmc.8 q0, q0
+ CLEAR_REG(q3)
+
+ aese.8 q0, q8
+ aesmc.8 q0, q0
+ CLEAR_REG(q8)
+
+ aese.8 q0, q9
+ aesmc.8 q0, q0
+ CLEAR_REG(q9)
+
+ aese.8 q0, q10
+ aesmc.8 q0, q0
+ CLEAR_REG(q10)
+
+ aese.8 q0, q11
+ aesmc.8 q0, q0
+ CLEAR_REG(q11)
+
+ aese.8 q0, q12
+ aesmc.8 q0, q0
+ CLEAR_REG(q12)
+
+ aese.8 q0, q13
+ aesmc.8 q0, q0
+ CLEAR_REG(q13)
+
+ aese.8 q0, q14
+ veor q0, q15
+ CLEAR_REG(q14)
+ CLEAR_REG(q15)
+
+ vst1.8 {q0}, [r1]
+ CLEAR_REG(q0)
+
+ mov r0, #0
+ bx lr
+
+.Lenc1_192:
+ aese.8 q0, q1
+ aesmc.8 q0, q0
+ vmov q1, q3
+
+ aese.8 q0, q2
+ aesmc.8 q0, q0
+ vldm r0!, {q2-q3} /* load 3 round keys */
+
+ b .Lenc1_tail
+
+.Lenc1_256:
+ vldm r0!, {q15} /* load 1 round key */
+ aese.8 q0, q1
+ aesmc.8 q0, q0
+
+ aese.8 q0, q2
+ aesmc.8 q0, q0
+
+ aese.8 q0, q3
+ aesmc.8 q0, q0
+ vldm r0!, {q1-q3} /* load 3 round keys */
+
+ aese.8 q0, q15
+ aesmc.8 q0, q0
+
+ b .Lenc1_tail
+.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;
+
+
+/*
+ * unsigned int _gcry_aes_dec_armv8_ce(void *keysched, byte *dst,
+ * const byte *src,
+ * unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_dec_armv8_ce
+.type _gcry_aes_dec_armv8_ce,%function;
+_gcry_aes_dec_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: dst
+ * r2: src
+ * r3: nrounds
+ */
+
+ vldmia r0!, {q1-q3} /* load 3 round keys */
+
+ cmp r3, #12
+
+ vld1.8 {q0}, [r2]
+
+ bhi .Ldec1_256
+ beq .Ldec1_192
+
+.Ldec1_128:
+
+.Ldec1_tail:
+ vldmia r0, {q8-q15} /* load 8 round keys */
+
+ aesd.8 q0, q1
+ aesimc.8 q0, q0
+ CLEAR_REG(q1)
+
+ aesd.8 q0, q2
+ aesimc.8 q0, q0
+ CLEAR_REG(q2)
+
+ aesd.8 q0, q3
+ aesimc.8 q0, q0
+ CLEAR_REG(q3)
+
+ aesd.8 q0, q8
+ aesimc.8 q0, q0
+ CLEAR_REG(q8)
+
+ aesd.8 q0, q9
+ aesimc.8 q0, q0
+ CLEAR_REG(q9)
+
+ aesd.8 q0, q10
+ aesimc.8 q0, q0
+ CLEAR_REG(q10)
+
+ aesd.8 q0, q11
+ aesimc.8 q0, q0
+ CLEAR_REG(q11)
+
+ aesd.8 q0, q12
+ aesimc.8 q0, q0
+ CLEAR_REG(q12)
+
+ aesd.8 q0, q13
+ aesimc.8 q0, q0
+ CLEAR_REG(q13)
+
+ aesd.8 q0, q14
+ veor q0, q15
+ CLEAR_REG(q14)
+ CLEAR_REG(q15)
+
+ vst1.8 {q0}, [r1]
+ CLEAR_REG(q0)
+
+ mov r0, #0
+ bx lr
+
+.Ldec1_192:
+ aesd.8 q0, q1
+ aesimc.8 q0, q0
+ vmov q1, q3
+
+ aesd.8 q0, q2
+ aesimc.8 q0, q0
+ vldm r0!, {q2-q3} /* load 3 round keys */
+
+ b .Ldec1_tail
+
+.Ldec1_256:
+ vldm r0!, {q15} /* load 1 round key */
+ aesd.8 q0, q1
+ aesimc.8 q0, q0
+
+ aesd.8 q0, q2
+ aesimc.8 q0, q0
+
+ aesd.8 q0, q3
+ aesimc.8 q0, q0
+ vldm r0!, {q1-q3} /* load 3 round keys */
+
+ aesd.8 q0, q15
+ aesimc.8 q0, q0
+
+ b .Ldec1_tail
+.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cbc_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, size_t nblocks,
+ * int cbc_mac, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_enc_armv8_ce
+.type _gcry_aes_cbc_enc_armv8_ce,%function;
+_gcry_aes_cbc_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * %st+0: nblocks => r4
+ * %st+4: cbc_mac => r5
+ * %st+8: nrounds => r6
+ */
+
+ push {r4-r6,lr} /* 4*4 = 16b */
+ ldr r4, [sp, #(16+0)]
+ ldr r5, [sp, #(16+4)]
+ cmp r4, #0
+ ldr r6, [sp, #(16+8)]
+ beq .Lcbc_enc_skip
+ cmp r5, #0
+ vpush {q4-q7}
+ moveq r5, #16
+ movne r5, #0
+
+ cmp r6, #12
+ vld1.8 {q1}, [r3] /* load IV */
+
+ aes_preload_keys(r0, lr);
+
+ beq .Lcbc_enc_loop192
+ bhi .Lcbc_enc_loop256
+
+#define CBC_ENC(bits, ...) \
+ .Lcbc_enc_loop##bits: \
+ vld1.8 {q0}, [r2]!; /* load plaintext */ \
+ veor q1, q0, q1; \
+ subs r4, r4, #1; \
+ \
+ do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+ \
+ vst1.8 {q1}, [r1], r5; /* store ciphertext */ \
+ \
+ bne .Lcbc_enc_loop##bits; \
+ b .Lcbc_enc_done;
+
+ CBC_ENC(128)
+ CBC_ENC(192, r0, lr)
+ CBC_ENC(256, r0, lr)
+
+#undef CBC_ENC
+
+.Lcbc_enc_done:
+ vst1.8 {q1}, [r3] /* store IV */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ vpop {q4-q7}
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+.Lcbc_enc_skip:
+ pop {r4-r6,pc}
+.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_dec_armv8_ce
+.type _gcry_aes_cbc_dec_armv8_ce,%function;
+_gcry_aes_cbc_dec_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * %st+0: nblocks => r4
+ * %st+4: nrounds => r5
+ */
+
+ push {r4-r6,lr} /* 4*4 = 16b */
+ ldr r4, [sp, #(16+0)]
+ ldr r5, [sp, #(16+4)]
+ cmp r4, #0
+ beq .Lcbc_dec_skip
+ vpush {q4-q7}
+
+ cmp r5, #12
+ vld1.8 {q0}, [r3] /* load IV */
+
+ aes_preload_keys(r0, r6);
+
+ beq .Lcbc_dec_entry_192
+ bhi .Lcbc_dec_entry_256
+
+#define CBC_DEC(bits, ...) \
+ .Lcbc_dec_entry_##bits: \
+ cmp r4, #4; \
+ blo .Lcbc_dec_loop_##bits; \
+ \
+ .Lcbc_dec_loop4_##bits: \
+ \
+ vld1.8 {q1-q2}, [r2]!; /* load ciphertext */ \
+ sub r4, r4, #4; \
+ vld1.8 {q3-q4}, [r2]; /* load ciphertext */ \
+ cmp r4, #4; \
+ sub r2, #32; \
+ \
+ do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ veor q1, q1, q0; \
+ vld1.8 {q0}, [r2]!; /* load next IV */ \
+ veor q2, q2, q0; \
+ vld1.8 {q0}, [r2]!; /* load next IV */ \
+ vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+ veor q3, q3, q0; \
+ vld1.8 {q0}, [r2]!; /* load next IV */ \
+ veor q4, q4, q0; \
+ vld1.8 {q0}, [r2]!; /* load next IV */ \
+ vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+ \
+ bhs .Lcbc_dec_loop4_##bits; \
+ cmp r4, #0; \
+ beq .Lcbc_dec_done; \
+ \
+ .Lcbc_dec_loop_##bits: \
+ vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+ subs r4, r4, #1; \
+ vmov q2, q1; \
+ \
+ do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__); \
+ \
+ veor q1, q1, q0; \
+ vmov q0, q2; \
+ vst1.8 {q1}, [r1]!; /* store plaintext */ \
+ \
+ bne .Lcbc_dec_loop_##bits; \
+ b .Lcbc_dec_done;
+
+ CBC_DEC(128)
+ CBC_DEC(192, r0, r6)
+ CBC_DEC(256, r0, r6)
+
+#undef CBC_DEC
+
+.Lcbc_dec_done:
+ vst1.8 {q0}, [r3] /* store IV */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ vpop {q4-q7}
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+.Lcbc_dec_skip:
+ pop {r4-r6,pc}
+.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cfb_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_enc_armv8_ce
+.type _gcry_aes_cfb_enc_armv8_ce,%function;
+_gcry_aes_cfb_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * %st+0: nblocks => r4
+ * %st+4: nrounds => r5
+ */
+
+ push {r4-r6,lr} /* 4*4 = 16b */
+ ldr r4, [sp, #(16+0)]
+ ldr r5, [sp, #(16+4)]
+ cmp r4, #0
+ beq .Lcfb_enc_skip
+ vpush {q4-q7}
+
+ cmp r5, #12
+ vld1.8 {q0}, [r3] /* load IV */
+
+ aes_preload_keys(r0, r6);
+
+ beq .Lcfb_enc_entry_192
+ bhi .Lcfb_enc_entry_256
+
+#define CFB_ENC(bits, ...) \
+ .Lcfb_enc_entry_##bits: \
+ .Lcfb_enc_loop_##bits: \
+ vld1.8 {q1}, [r2]!; /* load plaintext */ \
+ subs r4, r4, #1; \
+ \
+ do_aes_one##bits(e, mc, q0, q0, ##__VA_ARGS__); \
+ \
+ veor q0, q1, q0; \
+ vst1.8 {q0}, [r1]!; /* store ciphertext */ \
+ \
+ bne .Lcfb_enc_loop_##bits; \
+ b .Lcfb_enc_done;
+
+ CFB_ENC(128)
+ CFB_ENC(192, r0, r6)
+ CFB_ENC(256, r0, r6)
+
+#undef CFB_ENC
+
+.Lcfb_enc_done:
+ vst1.8 {q0}, [r3] /* store IV */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ vpop {q4-q7}
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+.Lcfb_enc_skip:
+ pop {r4-r6,pc}
+.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cfb_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_dec_armv8_ce
+.type _gcry_aes_cfb_dec_armv8_ce,%function;
+_gcry_aes_cfb_dec_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * %st+0: nblocks => r4
+ * %st+4: nrounds => r5
+ */
+
+ push {r4-r6,lr} /* 4*4 = 16b */
+ ldr r4, [sp, #(16+0)]
+ ldr r5, [sp, #(16+4)]
+ cmp r4, #0
+ beq .Lcfb_dec_skip
+ vpush {q4-q7}
+
+ cmp r5, #12
+ vld1.8 {q0}, [r3] /* load IV */
+
+ aes_preload_keys(r0, r6);
+
+ beq .Lcfb_dec_entry_192
+ bhi .Lcfb_dec_entry_256
+
+#define CFB_DEC(bits, ...) \
+ .Lcfb_dec_entry_##bits: \
+ cmp r4, #4; \
+ blo .Lcfb_dec_loop_##bits; \
+ \
+ .Lcfb_dec_loop4_##bits: \
+ \
+ vld1.8 {q2-q3}, [r2]!; /* load ciphertext */ \
+ vmov q1, q0; \
+ sub r4, r4, #4; \
+ vld1.8 {q4}, [r2]; /* load ciphertext */ \
+ sub r2, #32; \
+ cmp r4, #4; \
+ \
+ do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+ veor q1, q1, q0; \
+ vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+ veor q2, q2, q0; \
+ vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+ vld1.8 {q0}, [r2]!; \
+ veor q3, q3, q0; \
+ vld1.8 {q0}, [r2]!; /* load next IV / ciphertext */ \
+ veor q4, q4, q0; \
+ vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+ \
+ bhs .Lcfb_dec_loop4_##bits; \
+ cmp r4, #0; \
+ beq .Lcfb_dec_done; \
+ \
+ .Lcfb_dec_loop_##bits: \
+ \
+ vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+ \
+ subs r4, r4, #1; \
+ \
+ do_aes_one##bits(e, mc, q0, q0, ##__VA_ARGS__); \
+ \
+ veor q2, q1, q0; \
+ vmov q0, q1; \
+ vst1.8 {q2}, [r1]!; /* store plaintext */ \
+ \
+ bne .Lcfb_dec_loop_##bits; \
+ b .Lcfb_dec_done;
+
+ CFB_DEC(128)
+ CFB_DEC(192, r0, r6)
+ CFB_DEC(256, r0, r6)
+
+#undef CFB_DEC
+
+.Lcfb_dec_done:
+ vst1.8 {q0}, [r3] /* store IV */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ vpop {q4-q7}
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+.Lcfb_dec_skip:
+ pop {r4-r6,pc}
+.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ctr_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_ctr_enc_armv8_ce
+.type _gcry_aes_ctr_enc_armv8_ce,%function;
+_gcry_aes_ctr_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * %st+0: nblocks => r4
+ * %st+4: nrounds => r5
+ */
+
+ vpush {q4-q7}
+ push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+ ldr r4, [sp, #(104+0)]
+ ldr r5, [sp, #(104+4)]
+ cmp r4, #0
+ beq .Lctr_enc_skip
+
+ cmp r5, #12
+ ldm r3, {r7-r10}
+ vld1.8 {q0}, [r3] /* load IV */
+ rev r7, r7
+ rev r8, r8
+ rev r9, r9
+ rev r10, r10
+
+ aes_preload_keys(r0, r6);
+
+ beq .Lctr_enc_entry_192
+ bhi .Lctr_enc_entry_256
+
+#define CTR_ENC(bits, ...) \
+ .Lctr_enc_entry_##bits: \
+ cmp r4, #4; \
+ blo .Lctr_enc_loop_##bits; \
+ \
+ .Lctr_enc_loop4_##bits: \
+ cmp r10, #0xfffffffc; \
+ sub r4, r4, #4; \
+ blo .Lctr_enc_loop4_##bits##_nocarry; \
+ cmp r9, #0xffffffff; \
+ bne .Lctr_enc_loop4_##bits##_nocarry; \
+ \
+ adds r10, #1; \
+ vmov q1, q0; \
+ blcs .Lctr_overflow_one; \
+ rev r11, r10; \
+ vmov.32 d1[1], r11; \
+ \
+ adds r10, #1; \
+ vmov q2, q0; \
+ blcs .Lctr_overflow_one; \
+ rev r11, r10; \
+ vmov.32 d1[1], r11; \
+ \
+ adds r10, #1; \
+ vmov q3, q0; \
+ blcs .Lctr_overflow_one; \
+ rev r11, r10; \
+ vmov.32 d1[1], r11; \
+ \
+ adds r10, #1; \
+ vmov q4, q0; \
+ blcs .Lctr_overflow_one; \
+ rev r11, r10; \
+ vmov.32 d1[1], r11; \
+ \
+ b .Lctr_enc_loop4_##bits##_store_ctr; \
+ \
+ .Lctr_enc_loop4_##bits##_nocarry: \
+ \
+ veor q2, q2; \
+ vrev64.8 q1, q0; \
+ vceq.u32 d5, d5; \
+ vadd.u64 q3, q2, q2; \
+ vadd.u64 q4, q3, q2; \
+ vadd.u64 q0, q3, q3; \
+ vsub.u64 q2, q1, q2; \
+ vsub.u64 q3, q1, q3; \
+ vsub.u64 q4, q1, q4; \
+ vsub.u64 q0, q1, q0; \
+ vrev64.8 q1, q1; \
+ vrev64.8 q2, q2; \
+ vrev64.8 q3, q3; \
+ vrev64.8 q0, q0; \
+ vrev64.8 q4, q4; \
+ add r10, #4; \
+ \
+ .Lctr_enc_loop4_##bits##_store_ctr: \
+ \
+ vst1.8 {q0}, [r3]; \
+ cmp r4, #4; \
+ vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+ \
+ do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ veor q1, q1, q0; \
+ vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+ vst1.8 {q1}, [r1]!; /* store plaintext */ \
+ vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+ veor q2, q2, q0; \
+ veor q3, q3, q1; \
+ vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+ vst1.8 {q2}, [r1]!; /* store plaintext */ \
+ veor q4, q4, q0; \
+ vld1.8 {q0}, [r3]; /* reload IV */ \
+ vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+ \
+ bhs .Lctr_enc_loop4_##bits; \
+ cmp r4, #0; \
+ beq .Lctr_enc_done; \
+ \
+ .Lctr_enc_loop_##bits: \
+ \
+ adds r10, #1; \
+ vmov q1, q0; \
+ blcs .Lctr_overflow_one; \
+ rev r11, r10; \
+ subs r4, r4, #1; \
+ vld1.8 {q2}, [r2]!; /* load ciphertext */ \
+ vmov.32 d1[1], r11; \
+ \
+ do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+ \
+ veor q1, q2, q1; \
+ vst1.8 {q1}, [r1]!; /* store plaintext */ \
+ \
+ bne .Lctr_enc_loop_##bits; \
+ b .Lctr_enc_done;
+
+ CTR_ENC(128)
+ CTR_ENC(192, r0, r6)
+ CTR_ENC(256, r0, r6)
+
+#undef CTR_ENC
+
+.Lctr_enc_done:
+ vst1.8 {q0}, [r3] /* store IV */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+.Lctr_enc_skip:
+ pop {r4-r12,lr}
+ vpop {q4-q7}
+ bx lr
+
+.Lctr_overflow_one:
+ adcs r9, #0
+ adcs r8, #0
+ adc r7, #0
+ rev r11, r9
+ rev r12, r8
+ vmov.32 d1[0], r11
+ rev r11, r7
+ vmov.32 d0[1], r12
+ vmov.32 d0[0], r11
+ bx lr
+.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ocb_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *offset,
+ * unsigned char *checksum,
+ * unsigned char *L_table,
+ * size_t nblocks,
+ * unsigned int nrounds,
+ * unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_enc_armv8_ce
+.type _gcry_aes_ocb_enc_armv8_ce,%function;
+_gcry_aes_ocb_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: offset
+ * %st+0: checksum => r4
+ * %st+4: Ls => r5
+ * %st+8: nblocks => r6 (0 < nblocks <= 32)
+ * %st+12: nrounds => r7
+ * %st+16: blkn => lr
+ */
+
+ vpush {q4-q7}
+ push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+ ldr r7, [sp, #(104+12)]
+ ldr r4, [sp, #(104+0)]
+ ldr r5, [sp, #(104+4)]
+ ldr r6, [sp, #(104+8)]
+ ldr lr, [sp, #(104+16)]
+
+ cmp r7, #12
+ vld1.8 {q0}, [r3] /* load offset */
+
+ aes_preload_keys(r0, r12);
+
+ beq .Locb_enc_entry_192
+ bhi .Locb_enc_entry_256
+
+#define OCB_ENC(bits, ...) \
+ .Locb_enc_entry_##bits: \
+ cmp r6, #4; \
+ add lr, #1; \
+ blo .Locb_enc_loop_##bits; \
+ \
+ .Locb_enc_loop4_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \
+ \
+ add r9, lr, #1; \
+ add r10, lr, #2; \
+ add r11, lr, #3; \
+ rbit r8, lr; \
+ add lr, lr, #4; \
+ rbit r9, r9; \
+ rbit r10, r10; \
+ rbit r11, r11; \
+ clz r8, r8; /* ntz(i+0) */ \
+ clz r9, r9; /* ntz(i+1) */ \
+ clz r10, r10; /* ntz(i+2) */ \
+ clz r11, r11; /* ntz(i+3) */ \
+ add r8, r5, r8, lsl #4; \
+ add r9, r5, r9, lsl #4; \
+ add r10, r5, r10, lsl #4; \
+ add r11, r5, r11, lsl #4; \
+ \
+ sub r6, #4; \
+ \
+ vld1.8 {q9}, [r8]; /* load L_{ntz(i+0)} */ \
+ vld1.8 {q1-q2}, [r2]!; /* load P_i+<0-1> */ \
+ vld1.8 {q8}, [r4]; /* load Checksum_{i-1} */ \
+ veor q0, q0, q9; /* Offset_i+0 */ \
+ vld1.8 {q9}, [r9]; /* load L_{ntz(i+1)} */ \
+ veor q8, q8, q1; /* Checksum_i+0 */ \
+ veor q1, q1, q0; /* P_i+0 xor Offset_i+0 */\
+ vld1.8 {q3-q4}, [r2]!; /* load P_i+<2-3> */ \
+ vst1.8 {q0}, [r1]!; /* store Offset_i+0 */\
+ veor q0, q0, q9; /* Offset_i+1 */ \
+ vld1.8 {q9}, [r10]; /* load L_{ntz(i+2)} */ \
+ veor q8, q8, q2; /* Checksum_i+1 */ \
+ veor q2, q2, q0; /* P_i+1 xor Offset_i+1 */\
+ vst1.8 {q0}, [r1]!; /* store Offset_i+1 */\
+ veor q0, q0, q9; /* Offset_i+2 */ \
+ vld1.8 {q9}, [r11]; /* load L_{ntz(i+3)} */ \
+ veor q8, q8, q3; /* Checksum_i+2 */ \
+ veor q3, q3, q0; /* P_i+2 xor Offset_i+2 */\
+ vst1.8 {q0}, [r1]!; /* store Offset_i+2 */\
+ veor q0, q0, q9; /* Offset_i+3 */ \
+ veor q8, q8, q4; /* Checksum_i+3 */ \
+ veor q4, q4, q0; /* P_i+3 xor Offset_i+3 */\
+ vst1.8 {q0}, [r1]; /* store Offset_i+3 */\
+ sub r1, #(3*16); \
+ vst1.8 {q8}, [r4]; /* store Checksum_i+3 */\
+ \
+ cmp r6, #4; \
+ \
+ do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ mov r8, r1; \
+ vld1.8 {q8-q9}, [r1]!; \
+ veor q1, q1, q8; \
+ veor q2, q2, q9; \
+ vld1.8 {q8-q9}, [r1]!; \
+ vst1.8 {q1-q2}, [r8]!; \
+ veor q3, q3, q8; \
+ veor q4, q4, q9; \
+ vst1.8 {q3-q4}, [r8]; \
+ \
+ bhs .Locb_enc_loop4_##bits; \
+ cmp r6, #0; \
+ beq .Locb_enc_done; \
+ \
+ .Locb_enc_loop_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \
+ \
+ rbit r8, lr; \
+ add lr, #1; \
+ clz r8, r8; /* ntz(i) */ \
+ add r8, r5, r8, lsl #4; \
+ \
+ vld1.8 {q1}, [r2]!; /* load plaintext */ \
+ vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
+ vld1.8 {q3}, [r4]; /* load checksum */ \
+ subs r6, #1; \
+ veor q0, q0, q2; \
+ veor q3, q3, q1; \
+ veor q1, q1, q0; \
+ vst1.8 {q3}, [r4]; /* store checksum */ \
+ \
+ do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+ \
+ veor q1, q1, q0; \
+ vst1.8 {q1}, [r1]!; /* store ciphertext */ \
+ \
+ bne .Locb_enc_loop_##bits; \
+ b .Locb_enc_done;
+
+ OCB_ENC(128re, r0, r12)
+ OCB_ENC(192, r0, r12)
+ OCB_ENC(256, r0, r12)
+
+#undef OCB_ENC
+
+.Locb_enc_done:
+ vst1.8 {q0}, [r3] /* store offset */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+ pop {r4-r12,lr}
+ vpop {q4-q7}
+ bx lr
+.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ocb_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *offset,
+ * unsigned char *checksum,
+ * unsigned char *L_table,
+ * size_t nblocks,
+ * unsigned int nrounds,
+ * unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_dec_armv8_ce
+.type _gcry_aes_ocb_dec_armv8_ce,%function;
+_gcry_aes_ocb_dec_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: offset
+ * %st+0: checksum => r4
+ * %st+4: Ls => r5
+ * %st+8: nblocks => r6 (0 < nblocks <= 32)
+ * %st+12: nrounds => r7
+ * %st+16: blkn => lr
+ */
+
+ vpush {q4-q7}
+ push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+ ldr r7, [sp, #(104+12)]
+ ldr r4, [sp, #(104+0)]
+ ldr r5, [sp, #(104+4)]
+ ldr r6, [sp, #(104+8)]
+ ldr lr, [sp, #(104+16)]
+
+ cmp r7, #12
+ vld1.8 {q0}, [r3] /* load offset */
+
+ aes_preload_keys(r0, r12);
+
+ beq .Locb_dec_entry_192
+ bhi .Locb_dec_entry_256
+
+#define OCB_DEC(bits, ...) \
+ .Locb_dec_entry_##bits: \
+ cmp r6, #4; \
+ add lr, #1; \
+ blo .Locb_dec_loop_##bits; \
+ \
+ .Locb_dec_loop4_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ \
+ add r9, lr, #1; \
+ add r10, lr, #2; \
+ add r11, lr, #3; \
+ rbit r8, lr; \
+ add lr, lr, #4; \
+ rbit r9, r9; \
+ rbit r10, r10; \
+ rbit r11, r11; \
+ clz r8, r8; /* ntz(i+0) */ \
+ clz r9, r9; /* ntz(i+1) */ \
+ clz r10, r10; /* ntz(i+2) */ \
+ clz r11, r11; /* ntz(i+3) */ \
+ add r8, r5, r8, lsl #4; \
+ add r9, r5, r9, lsl #4; \
+ add r10, r5, r10, lsl #4; \
+ add r11, r5, r11, lsl #4; \
+ \
+ sub r6, #4; \
+ \
+ vld1.8 {q9}, [r8]; /* load L_{ntz(i+0)} */ \
+ vld1.8 {q1-q2}, [r2]!; /* load P_i+<0-1> */ \
+ veor q0, q0, q9; /* Offset_i+0 */ \
+ vld1.8 {q9}, [r9]; /* load L_{ntz(i+1)} */ \
+ veor q1, q1, q0; /* P_i+0 xor Offset_i+0 */\
+ vld1.8 {q3-q4}, [r2]!; /* load P_i+<2-3> */ \
+ vst1.8 {q0}, [r1]!; /* store Offset_i+0 */\
+ veor q0, q0, q9; /* Offset_i+1 */ \
+ vld1.8 {q9}, [r10]; /* load L_{ntz(i+2)} */ \
+ veor q2, q2, q0; /* P_i+1 xor Offset_i+1 */\
+ vst1.8 {q0}, [r1]!; /* store Offset_i+1 */\
+ veor q0, q0, q9; /* Offset_i+2 */ \
+ vld1.8 {q9}, [r11]; /* load L_{ntz(i+3)} */ \
+ veor q3, q3, q0; /* P_i+2 xor Offset_i+2 */\
+ vst1.8 {q0}, [r1]!; /* store Offset_i+2 */\
+ veor q0, q0, q9; /* Offset_i+3 */ \
+ veor q4, q4, q0; /* P_i+3 xor Offset_i+3 */\
+ vst1.8 {q0}, [r1]; /* store Offset_i+3 */\
+ sub r1, #(3*16); \
+ \
+ cmp r6, #4; \
+ \
+ do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ mov r8, r1; \
+ vld1.8 {q8-q9}, [r1]!; \
+ veor q1, q1, q8; \
+ veor q2, q2, q9; \
+ vld1.8 {q8-q9}, [r1]!; \
+ vst1.8 {q1-q2}, [r8]!; \
+ veor q1, q1, q2; \
+ vld1.8 {q2}, [r4]; /* load Checksum_{i-1} */ \
+ veor q3, q3, q8; \
+ veor q1, q1, q3; \
+ veor q4, q4, q9; \
+ veor q1, q1, q4; \
+ vst1.8 {q3-q4}, [r8]; \
+ veor q2, q2, q1; \
+ vst1.8 {q2}, [r4]; /* store Checksum_i+3 */ \
+ \
+ bhs .Locb_dec_loop4_##bits; \
+ cmp r6, #0; \
+ beq .Locb_dec_done; \
+ \
+ .Locb_dec_loop_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ \
+ rbit r8, lr; \
+ add lr, #1; \
+ clz r8, r8; /* ntz(i) */ \
+ add r8, r5, r8, lsl #4; \
+ \
+ vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
+ vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+ subs r6, #1; \
+ veor q0, q0, q2; \
+ veor q1, q1, q0; \
+ \
+ do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__) \
+ \
+ vld1.8 {q2}, [r4]; /* load checksum */ \
+ veor q1, q1, q0; \
+ vst1.8 {q1}, [r1]!; /* store plaintext */ \
+ veor q2, q2, q1; \
+ vst1.8 {q2}, [r4]; /* store checksum */ \
+ \
+ bne .Locb_dec_loop_##bits; \
+ b .Locb_dec_done;
+
+ OCB_DEC(128re, r0, r12)
+ OCB_DEC(192, r0, r12)
+ OCB_DEC(256, r0, r12)
+
+#undef OCB_DEC
+
+.Locb_dec_done:
+ vst1.8 {q0}, [r3] /* store offset */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+ pop {r4-r12,lr}
+ vpop {q4-q7}
+ bx lr
+.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ocb_auth_armv8_ce (const void *keysched,
+ * const unsigned char *abuf,
+ * unsigned char *offset,
+ * unsigned char *checksum,
+ * unsigned char *L_table,
+ * size_t nblocks,
+ * unsigned int nrounds,
+ * unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_auth_armv8_ce
+.type _gcry_aes_ocb_auth_armv8_ce,%function;
+_gcry_aes_ocb_auth_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: abuf
+ * r2: offset
+ * r3: checksum
+ * %st+0: Ls => r5
+ * %st+4: nblocks => r6 (0 < nblocks <= 32)
+ * %st+8: nrounds => r7
+ * %st+12: blkn => lr
+ */
+
+ vpush {q4-q7}
+ push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+ ldr r7, [sp, #(104+8)]
+ ldr r5, [sp, #(104+0)]
+ ldr r6, [sp, #(104+4)]
+ ldr lr, [sp, #(104+12)]
+
+ cmp r7, #12
+ vld1.8 {q0}, [r2] /* load offset */
+
+ aes_preload_keys(r0, r12);
+
+ beq .Locb_auth_entry_192
+ bhi .Locb_auth_entry_256
+
+#define OCB_AUTH(bits, ...) \
+ .Locb_auth_entry_##bits: \
+ cmp r6, #4; \
+ add lr, #1; \
+ blo .Locb_auth_loop_##bits; \
+ \
+ .Locb_auth_loop4_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \
+ \
+ add r9, lr, #1; \
+ add r10, lr, #2; \
+ add r11, lr, #3; \
+ rbit r8, lr; \
+ add lr, lr, #4; \
+ rbit r9, r9; \
+ rbit r10, r10; \
+ rbit r11, r11; \
+ clz r8, r8; /* ntz(i+0) */ \
+ clz r9, r9; /* ntz(i+1) */ \
+ clz r10, r10; /* ntz(i+2) */ \
+ clz r11, r11; /* ntz(i+3) */ \
+ add r8, r5, r8, lsl #4; \
+ add r9, r5, r9, lsl #4; \
+ add r10, r5, r10, lsl #4; \
+ add r11, r5, r11, lsl #4; \
+ \
+ sub r6, #4; \
+ \
+ vld1.8 {q9}, [r8]; /* load L_{ntz(i+0)} */ \
+ vld1.8 {q1-q2}, [r1]!; /* load A_i+<0-1> */ \
+ veor q0, q0, q9; /* Offset_i+0 */ \
+ vld1.8 {q9}, [r9]; /* load L_{ntz(i+1)} */ \
+ veor q1, q1, q0; /* A_i+0 xor Offset_i+0 */\
+ vld1.8 {q3-q4}, [r1]!; /* load A_i+<2-3> */ \
+ veor q0, q0, q9; /* Offset_i+1 */ \
+ vld1.8 {q9}, [r10]; /* load L_{ntz(i+2)} */ \
+ veor q2, q2, q0; /* A_i+1 xor Offset_i+1 */\
+ veor q0, q0, q9; /* Offset_i+2 */ \
+ vld1.8 {q9}, [r11]; /* load L_{ntz(i+3)} */ \
+ veor q3, q3, q0; /* A_i+2 xor Offset_i+2 */\
+ veor q0, q0, q9; /* Offset_i+3 */ \
+ veor q4, q4, q0; /* A_i+3 xor Offset_i+3 */\
+ \
+ cmp r6, #4; \
+ \
+ do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ veor q1, q1, q2; \
+ veor q3, q3, q4; \
+ vld1.8 {q2}, [r3]; \
+ veor q1, q1, q3; \
+ veor q2, q2, q1; \
+ vst1.8 {q2}, [r3]; \
+ \
+ bhs .Locb_auth_loop4_##bits; \
+ cmp r6, #0; \
+ beq .Locb_auth_done; \
+ \
+ .Locb_auth_loop_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \
+ \
+ rbit r8, lr; \
+ add lr, #1; \
+ clz r8, r8; /* ntz(i) */ \
+ add r8, r5, r8, lsl #4; \
+ \
+ vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
+ vld1.8 {q1}, [r1]!; /* load aadtext */ \
+ subs r6, #1; \
+ veor q0, q0, q2; \
+ vld1.8 {q2}, [r3]; /* load checksum */ \
+ veor q1, q1, q0; \
+ \
+ do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__) \
+ \
+ veor q2, q2, q1; \
+ vst1.8 {q2}, [r3]; /* store checksum */ \
+ \
+ bne .Locb_auth_loop_##bits; \
+ b .Locb_auth_done;
+
+ OCB_AUTH(128re, r0, r12)
+ OCB_AUTH(192, r0, r12)
+ OCB_AUTH(256, r0, r12)
+
+#undef OCB_AUTH
+
+.Locb_auth_done:
+ vst1.8 {q0}, [r2] /* store offset */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+ pop {r4-r12,lr}
+ vpop {q4-q7}
+ bx lr
+.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;
+
+
+
+/*
+ * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_enc_armv8_ce
+.type _gcry_aes_xts_enc_armv8_ce,%function;
+_gcry_aes_xts_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * %st+0: nblocks => r4
+ * %st+4: nrounds => r5
+ */
+
+ vpush {q4-q7}
+ push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+ ldr r4, [sp, #(104+0)]
+ ldr r5, [sp, #(104+4)]
+ cmp r4, #0
+ beq .Lxts_enc_skip
+
+ cmp r5, #12
+
+ vld1.8 {q0}, [r3] /* load tweak */
+ mov r7, #0x87;
+
+ aes_preload_keys(r0, r6);
+
+ beq .Lxts_enc_entry_192
+ bhi .Lxts_enc_entry_256
+
+#define CTR_XTS(bits, ...) \
+ .Lxts_enc_entry_##bits: \
+ cmp r4, #4; \
+ blo .Lxts_enc_loop_##bits; \
+ \
+ .Lxts_enc_loop4_##bits: \
+ sub r4, r4, #4; \
+ veor q9, q9, q9; \
+ \
+ vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \
+ veor q1, q1, q0; \
+ cmp r4, #4; \
+ vmov.u32 d18[0], r7; \
+ vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \
+ veor q2, q2, q0; \
+ vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ veor q3, q3, q0; \
+ vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ veor q4, q4, q0; \
+ vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \
+ sub r1, r1, #48; \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \
+ veor q1, q1, q8; \
+ veor q2, q2, q9; \
+ vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \
+ sub r1, r1, #32; \
+ veor q3, q3, q8; \
+ veor q4, q4, q9; \
+ vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+ vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+ \
+ bhs .Lxts_enc_loop4_##bits; \
+ cmp r4, #0; \
+ beq .Lxts_enc_done; \
+ \
+ .Lxts_enc_loop_##bits: \
+ \
+ vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+ \
+ veor q9, q9, q9; \
+ veor q1, q1, q0; \
+ vmov.u32 d18[0], r7; \
+ vmov q2, q0; \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ subs r4, r4, #1; \
+ \
+ do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+ \
+ veor q1, q1, q2; \
+ vst1.8 {q1}, [r1]!; /* store plaintext */ \
+ \
+ bne .Lxts_enc_loop_##bits; \
+ b .Lxts_enc_done;
+
+ CTR_XTS(128re, r0, r6)
+ CTR_XTS(192, r0, r6)
+ CTR_XTS(256, r0, r6)
+
+#undef CTR_XTS
+
+.Lxts_enc_done:
+ vst1.8 {q0}, [r3] /* store tweak */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+.Lxts_enc_skip:
+ pop {r4-r12,lr}
+ vpop {q4-q7}
+ bx lr
+.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_dec_armv8_ce
+.type _gcry_aes_xts_dec_armv8_ce,%function;
+_gcry_aes_xts_dec_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * %st+0: nblocks => r4
+ * %st+4: nrounds => r5
+ */
+
+ vpush {q4-q7}
+ push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+ ldr r4, [sp, #(104+0)]
+ ldr r5, [sp, #(104+4)]
+ cmp r4, #0
+ beq .Lxts_dec_skip
+
+ cmp r5, #12
+
+ vld1.8 {q0}, [r3] /* load tweak */
+ mov r7, #0x87;
+
+ aes_preload_keys(r0, r6);
+
+ beq .Lxts_dec_entry_192
+ bhi .Lxts_dec_entry_256
+
+#define CTR_XTS(bits, ...) \
+ .Lxts_dec_entry_##bits: \
+ cmp r4, #4; \
+ blo .Lxts_dec_loop_##bits; \
+ \
+ .Lxts_dec_loop4_##bits: \
+ sub r4, r4, #4; \
+ veor q9, q9, q9; \
+ \
+ vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \
+ veor q1, q1, q0; \
+ cmp r4, #4; \
+ vmov.u32 d18[0], r7; \
+ vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \
+ veor q2, q2, q0; \
+ vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ veor q3, q3, q0; \
+ vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ veor q4, q4, q0; \
+ vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \
+ sub r1, r1, #48; \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ \
+ do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \
+ \
+ vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \
+ veor q1, q1, q8; \
+ veor q2, q2, q9; \
+ vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \
+ sub r1, r1, #32; \
+ veor q3, q3, q8; \
+ veor q4, q4, q9; \
+ vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+ vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+ \
+ bhs .Lxts_dec_loop4_##bits; \
+ cmp r4, #0; \
+ beq .Lxts_dec_done; \
+ \
+ .Lxts_dec_loop_##bits: \
+ \
+ vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+ \
+ veor q9, q9, q9; \
+ veor q1, q1, q0; \
+ vmov.u32 d18[0], r7; \
+ vmov q2, q0; \
+ \
+ vshr.s64 d16, d1, #63; \
+ vshr.u64 d17, d0, #63; \
+ vadd.u64 q0, q0, q0; \
+ vand d16, d16, d18; \
+ veor q0, q0, q8; \
+ subs r4, r4, #1; \
+ \
+ do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__); \
+ \
+ veor q1, q1, q2; \
+ vst1.8 {q1}, [r1]!; /* store plaintext */ \
+ \
+ bne .Lxts_dec_loop_##bits; \
+ b .Lxts_dec_done;
+
+ CTR_XTS(128re, r0, r6)
+ CTR_XTS(192, r0, r6)
+ CTR_XTS(256, r0, r6)
+
+#undef CTR_XTS
+
+.Lxts_dec_done:
+ vst1.8 {q0}, [r3] /* store tweak */
+
+ CLEAR_REG(q0)
+ CLEAR_REG(q1)
+ CLEAR_REG(q2)
+ CLEAR_REG(q3)
+ CLEAR_REG(q8)
+ CLEAR_REG(q9)
+ CLEAR_REG(q10)
+ CLEAR_REG(q11)
+ CLEAR_REG(q12)
+ CLEAR_REG(q13)
+ CLEAR_REG(q14)
+
+.Lxts_dec_skip:
+ pop {r4-r12,lr}
+ vpop {q4-q7}
+ bx lr
+.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
+
+
+/*
+ * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
+ */
+.align 3
+.globl _gcry_aes_sbox4_armv8_ce
+.type _gcry_aes_sbox4_armv8_ce,%function;
+_gcry_aes_sbox4_armv8_ce:
+ /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+ vmov.i8 q0, #0x52
+ vmov.i8 q1, #0
+ vmov s0, r0
+ aese.8 q0, q1
+ veor d0, d1
+ vpadd.i32 d0, d0, d1
+ vmov r0, s0
+ CLEAR_REG(q0)
+ bx lr
+.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;
+
+
+/*
+ * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src);
+ */
+.align 3
+.globl _gcry_aes_invmixcol_armv8_ce
+.type _gcry_aes_invmixcol_armv8_ce,%function;
+_gcry_aes_invmixcol_armv8_ce:
+ vld1.8 {q0}, [r1]
+ aesimc.8 q0, q0
+ vst1.8 {q0}, [r0]
+ CLEAR_REG(q0)
+ bx lr
+.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S b/comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S
new file mode 100644
index 0000000000..3af29e0d0c
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S
@@ -0,0 +1,1613 @@
+/* rijndael-armv8-aarch64-ce.S - ARMv8/CE accelerated AES
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Register macros */
+
+#define vk0 v17
+#define vk1 v18
+#define vk2 v19
+#define vk3 v20
+#define vk4 v21
+#define vk5 v22
+#define vk6 v23
+#define vk7 v24
+#define vk8 v25
+#define vk9 v26
+#define vk10 v27
+#define vk11 v28
+#define vk12 v29
+#define vk13 v30
+#define vk14 v31
+
+
+/* AES macros */
+
+#define aes_preload_keys(keysched, nrounds) \
+ cmp nrounds, #12; \
+ ld1 {vk0.16b-vk3.16b}, [keysched], #64; \
+ ld1 {vk4.16b-vk7.16b}, [keysched], #64; \
+ ld1 {vk8.16b-vk10.16b}, [keysched], #48; \
+ b.lo 1f; \
+ ld1 {vk11.16b-vk12.16b}, [keysched], #32; \
+ b.eq 1f; \
+ ld1 {vk13.16b-vk14.16b}, [keysched]; \
+1: ;
+
+#define do_aes_one128(ed, mcimc, vo, vb) \
+ aes##ed vb.16b, vk0.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk1.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk2.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk3.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk4.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk5.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk6.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk7.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk8.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk9.16b; \
+ eor vo.16b, vb.16b, vk10.16b;
+
+#define do_aes_one192(ed, mcimc, vo, vb) \
+ aes##ed vb.16b, vk0.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk1.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk2.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk3.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk4.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk5.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk6.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk7.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk8.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk9.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk10.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk11.16b; \
+ eor vo.16b, vb.16b, vk12.16b;
+
+#define do_aes_one256(ed, mcimc, vo, vb) \
+ aes##ed vb.16b, vk0.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk1.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk2.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk3.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk4.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk5.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk6.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk7.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk8.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk9.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk10.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk11.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk12.16b; \
+ aes##mcimc vb.16b, vb.16b; \
+ aes##ed vb.16b, vk13.16b; \
+ eor vo.16b, vb.16b, vk14.16b;
+
+#define aes_round_4(ed, mcimc, b0, b1, b2, b3, key) \
+ aes##ed b0.16b, key.16b; \
+ aes##mcimc b0.16b, b0.16b; \
+ aes##ed b1.16b, key.16b; \
+ aes##mcimc b1.16b, b1.16b; \
+ aes##ed b2.16b, key.16b; \
+ aes##mcimc b2.16b, b2.16b; \
+ aes##ed b3.16b, key.16b; \
+ aes##mcimc b3.16b, b3.16b;
+
+#define aes_lastround_4(ed, b0, b1, b2, b3, key1, key2) \
+ aes##ed b0.16b, key1.16b; \
+ eor b0.16b, b0.16b, key2.16b; \
+ aes##ed b1.16b, key1.16b; \
+ eor b1.16b, b1.16b, key2.16b; \
+ aes##ed b2.16b, key1.16b; \
+ eor b2.16b, b2.16b, key2.16b; \
+ aes##ed b3.16b, key1.16b; \
+ eor b3.16b, b3.16b, key2.16b;
+
+#define do_aes_4_128(ed, mcimc, b0, b1, b2, b3) \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \
+ aes_lastround_4(ed, b0, b1, b2, b3, vk9, vk10);
+
+#define do_aes_4_192(ed, mcimc, b0, b1, b2, b3) \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \
+ aes_lastround_4(ed, b0, b1, b2, b3, vk11, vk12);
+
+#define do_aes_4_256(ed, mcimc, b0, b1, b2, b3) \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk11); \
+ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk12); \
+ aes_lastround_4(ed, b0, b1, b2, b3, vk13, vk14);
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b;
+
+#define aes_clear_keys(nrounds) \
+ cmp nrounds, #12; \
+ CLEAR_REG(vk0); \
+ CLEAR_REG(vk1); \
+ CLEAR_REG(vk2); \
+ CLEAR_REG(vk3); \
+ CLEAR_REG(vk4); \
+ CLEAR_REG(vk5); \
+ CLEAR_REG(vk6); \
+ CLEAR_REG(vk7); \
+ CLEAR_REG(vk9); \
+ CLEAR_REG(vk8); \
+ CLEAR_REG(vk10); \
+ b.lo 1f; \
+ CLEAR_REG(vk11); \
+ CLEAR_REG(vk12); \
+ b.eq 1f; \
+ CLEAR_REG(vk13); \
+ CLEAR_REG(vk14); \
+1: ;
+
+
+/*
+ * unsigned int _gcry_aes_enc_armv8_ce(void *keysched, byte *dst,
+ * const byte *src,
+ * unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_enc_armv8_ce
+ELF(.type _gcry_aes_enc_armv8_ce,%function;)
+_gcry_aes_enc_armv8_ce:
+ /* input:
+ * x0: keysched
+ * x1: dst
+ * x2: src
+ * w3: nrounds
+ */
+ CFI_STARTPROC();
+
+ aes_preload_keys(x0, w3);
+
+ ld1 {v0.16b}, [x2]
+
+ b.hi .Lenc1_256
+ b.eq .Lenc1_192
+
+.Lenc1_128:
+ do_aes_one128(e, mc, v0, v0);
+
+.Lenc1_tail:
+ CLEAR_REG(vk0)
+ CLEAR_REG(vk1)
+ CLEAR_REG(vk2)
+ CLEAR_REG(vk3)
+ CLEAR_REG(vk4)
+ CLEAR_REG(vk5)
+ CLEAR_REG(vk6)
+ CLEAR_REG(vk7)
+ CLEAR_REG(vk8)
+ CLEAR_REG(vk9)
+ CLEAR_REG(vk10)
+ st1 {v0.16b}, [x1]
+ CLEAR_REG(v0)
+
+ mov x0, #0
+ ret
+
+.Lenc1_192:
+ do_aes_one192(e, mc, v0, v0);
+
+ CLEAR_REG(vk11)
+ CLEAR_REG(vk12)
+ b .Lenc1_tail
+
+.Lenc1_256:
+ do_aes_one256(e, mc, v0, v0);
+
+ CLEAR_REG(vk11)
+ CLEAR_REG(vk12)
+ CLEAR_REG(vk13)
+ CLEAR_REG(vk14)
+ b .Lenc1_tail
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;)
+
+
+/*
+ * unsigned int _gcry_aes_dec_armv8_ce(void *keysched, byte *dst,
+ * const byte *src,
+ * unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_dec_armv8_ce
+ELF(.type _gcry_aes_dec_armv8_ce,%function;)
+_gcry_aes_dec_armv8_ce:
+ /* input:
+ * x0: keysched
+ * x1: dst
+ * x2: src
+ * w3: nrounds
+ */
+ CFI_STARTPROC();
+
+ aes_preload_keys(x0, w3);
+
+ ld1 {v0.16b}, [x2]
+
+ b.hi .Ldec1_256
+ b.eq .Ldec1_192
+
+.Ldec1_128:
+ do_aes_one128(d, imc, v0, v0);
+
+.Ldec1_tail:
+ CLEAR_REG(vk0)
+ CLEAR_REG(vk1)
+ CLEAR_REG(vk2)
+ CLEAR_REG(vk3)
+ CLEAR_REG(vk4)
+ CLEAR_REG(vk5)
+ CLEAR_REG(vk6)
+ CLEAR_REG(vk7)
+ CLEAR_REG(vk8)
+ CLEAR_REG(vk9)
+ CLEAR_REG(vk10)
+ st1 {v0.16b}, [x1]
+ CLEAR_REG(v0)
+
+ mov x0, #0
+ ret
+
+.Ldec1_192:
+ do_aes_one192(d, imc, v0, v0);
+
+ CLEAR_REG(vk11)
+ CLEAR_REG(vk12)
+ b .Ldec1_tail
+
+.Ldec1_256:
+ do_aes_one256(d, imc, v0, v0);
+
+ CLEAR_REG(vk11)
+ CLEAR_REG(vk12)
+ CLEAR_REG(vk13)
+ CLEAR_REG(vk14)
+ b .Ldec1_tail
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_cbc_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, size_t nblocks,
+ * int cbc_mac, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_enc_armv8_ce
+ELF(.type _gcry_aes_cbc_enc_armv8_ce,%function;)
+_gcry_aes_cbc_enc_armv8_ce:
+ /* input:
+ * x0: keysched
+ * x1: outbuf
+ * x2: inbuf
+ * x3: iv
+ * x4: nblocks
+ * w5: cbc_mac
+ * w6: nrounds
+ */
+ CFI_STARTPROC();
+
+ cbz x4, .Lcbc_enc_skip
+
+ cmp w5, #0
+ ld1 {v1.16b}, [x3] /* load IV */
+ cset x5, eq
+
+ aes_preload_keys(x0, w6);
+ lsl x5, x5, #4
+
+ b.eq .Lcbc_enc_loop192
+ b.hi .Lcbc_enc_loop256
+
+#define CBC_ENC(bits) \
+ .Lcbc_enc_loop##bits: \
+ ld1 {v0.16b}, [x2], #16; /* load plaintext */ \
+ eor v1.16b, v0.16b, v1.16b; \
+ sub x4, x4, #1; \
+ \
+ do_aes_one##bits(e, mc, v1, v1); \
+ \
+ st1 {v1.16b}, [x1], x5; /* store ciphertext */ \
+ \
+ cbnz x4, .Lcbc_enc_loop##bits; \
+ b .Lcbc_enc_done;
+
+ CBC_ENC(128)
+ CBC_ENC(192)
+ CBC_ENC(256)
+
+#undef CBC_ENC
+
+.Lcbc_enc_done:
+ aes_clear_keys(w6)
+
+ st1 {v1.16b}, [x3] /* store IV */
+
+ CLEAR_REG(v1)
+ CLEAR_REG(v0)
+
+.Lcbc_enc_skip:
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;)
+
+/*
+ * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_dec_armv8_ce
+ELF(.type _gcry_aes_cbc_dec_armv8_ce,%function;)
+_gcry_aes_cbc_dec_armv8_ce:
+ /* input:
+ * x0: keysched
+ * x1: outbuf
+ * x2: inbuf
+ * x3: iv
+ * x4: nblocks
+ * w5: nrounds
+ */
+ CFI_STARTPROC();
+
+ cbz x4, .Lcbc_dec_skip
+
+ ld1 {v0.16b}, [x3] /* load IV */
+
+ aes_preload_keys(x0, w5);
+
+ b.eq .Lcbc_dec_entry_192
+ b.hi .Lcbc_dec_entry_256
+
+#define CBC_DEC(bits) \
+ .Lcbc_dec_entry_##bits: \
+ cmp x4, #4; \
+ b.lo .Lcbc_dec_loop_##bits; \
+ \
+ .Lcbc_dec_loop4_##bits: \
+ \
+ ld1 {v1.16b-v4.16b}, [x2], #64; /* load ciphertext */ \
+ sub x4, x4, #4; \
+ mov v5.16b, v1.16b; \
+ mov v6.16b, v2.16b; \
+ mov v7.16b, v3.16b; \
+ mov v16.16b, v4.16b; \
+ cmp x4, #4; \
+ \
+ do_aes_4_##bits(d, imc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v0.16b; \
+ eor v2.16b, v2.16b, v5.16b; \
+ st1 {v1.16b-v2.16b}, [x1], #32; /* store plaintext */ \
+ eor v3.16b, v3.16b, v6.16b; \
+ eor v4.16b, v4.16b, v7.16b; \
+ mov v0.16b, v16.16b; /* next IV */ \
+ st1 {v3.16b-v4.16b}, [x1], #32; /* store plaintext */ \
+ \
+ b.hs .Lcbc_dec_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ CLEAR_REG(v16); \
+ cbz x4, .Lcbc_dec_done; \
+ \
+ .Lcbc_dec_loop_##bits: \
+ ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \
+ sub x4, x4, #1; \
+ mov v2.16b, v1.16b; \
+ \
+ do_aes_one##bits(d, imc, v1, v1); \
+ \
+ eor v1.16b, v1.16b, v0.16b; \
+ mov v0.16b, v2.16b; \
+ st1 {v1.16b}, [x1], #16; /* store plaintext */ \
+ \
+ cbnz x4, .Lcbc_dec_loop_##bits; \
+ b .Lcbc_dec_done;
+
+ CBC_DEC(128)
+ CBC_DEC(192)
+ CBC_DEC(256)
+
+#undef CBC_DEC
+
+.Lcbc_dec_done:
+ aes_clear_keys(w5)
+
+ st1 {v0.16b}, [x3] /* store IV */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+
+.Lcbc_dec_skip:
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ctr_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_ctr_enc_armv8_ce
+ELF(.type _gcry_aes_ctr_enc_armv8_ce,%function;)
+_gcry_aes_ctr_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * x4: nblocks
+ * w5: nrounds
+ */
+ CFI_STARTPROC();
+
+ cbz x4, .Lctr_enc_skip
+
+ mov x6, #1
+ movi v16.16b, #0
+ mov v16.D[1], x6
+
+ /* load IV */
+ ldp x9, x10, [x3]
+ ld1 {v0.16b}, [x3]
+ rev x9, x9
+ rev x10, x10
+
+ aes_preload_keys(x0, w5);
+
+ b.eq .Lctr_enc_entry_192
+ b.hi .Lctr_enc_entry_256
+
+#define CTR_ENC(bits) \
+ .Lctr_enc_entry_##bits: \
+ cmp x4, #4; \
+ b.lo .Lctr_enc_loop_##bits; \
+ \
+ .Lctr_enc_loop4_##bits: \
+ cmp x10, #0xfffffffffffffffc; \
+ sub x4, x4, #4; \
+ b.lo .Lctr_enc_loop4_##bits##_nocarry; \
+ \
+ adds x10, x10, #1; \
+ mov v1.16b, v0.16b; \
+ adc x9, x9, xzr; \
+ mov v2.D[1], x10; \
+ mov v2.D[0], x9; \
+ \
+ adds x10, x10, #1; \
+ rev64 v2.16b, v2.16b; \
+ adc x9, x9, xzr; \
+ mov v3.D[1], x10; \
+ mov v3.D[0], x9; \
+ \
+ adds x10, x10, #1; \
+ rev64 v3.16b, v3.16b; \
+ adc x9, x9, xzr; \
+ mov v4.D[1], x10; \
+ mov v4.D[0], x9; \
+ \
+ adds x10, x10, #1; \
+ rev64 v4.16b, v4.16b; \
+ adc x9, x9, xzr; \
+ mov v0.D[1], x10; \
+ mov v0.D[0], x9; \
+ rev64 v0.16b, v0.16b; \
+ \
+ b .Lctr_enc_loop4_##bits##_store_ctr; \
+ \
+ .Lctr_enc_loop4_##bits##_nocarry: \
+ \
+ add v3.2d, v16.2d, v16.2d; /* 2 */ \
+ rev64 v6.16b, v0.16b; \
+ add x10, x10, #4; \
+ add v4.2d, v3.2d, v16.2d; /* 3 */ \
+ add v0.2d, v3.2d, v3.2d; /* 4 */ \
+ rev64 v1.16b, v6.16b; \
+ add v2.2d, v6.2d, v16.2d; \
+ add v3.2d, v6.2d, v3.2d; \
+ add v4.2d, v6.2d, v4.2d; \
+ add v0.2d, v6.2d, v0.2d; \
+ rev64 v2.16b, v2.16b; \
+ rev64 v3.16b, v3.16b; \
+ rev64 v0.16b, v0.16b; \
+ rev64 v4.16b, v4.16b; \
+ \
+ .Lctr_enc_loop4_##bits##_store_ctr: \
+ \
+ st1 {v0.16b}, [x3]; \
+ cmp x4, #4; \
+ ld1 {v5.16b-v7.16b}, [x2], #48; /* preload ciphertext */ \
+ \
+ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v5.16b; \
+ ld1 {v5.16b}, [x2], #16; /* load ciphertext */ \
+ eor v2.16b, v2.16b, v6.16b; \
+ eor v3.16b, v3.16b, v7.16b; \
+ eor v4.16b, v4.16b, v5.16b; \
+ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+ \
+ b.hs .Lctr_enc_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ cbz x4, .Lctr_enc_done; \
+ \
+ .Lctr_enc_loop_##bits: \
+ \
+ adds x10, x10, #1; \
+ mov v1.16b, v0.16b; \
+ adc x9, x9, xzr; \
+ mov v0.D[1], x10; \
+ mov v0.D[0], x9; \
+ sub x4, x4, #1; \
+ ld1 {v2.16b}, [x2], #16; /* load ciphertext */ \
+ rev64 v0.16b, v0.16b; \
+ \
+ do_aes_one##bits(e, mc, v1, v1); \
+ \
+ eor v1.16b, v2.16b, v1.16b; \
+ st1 {v1.16b}, [x1], #16; /* store plaintext */ \
+ \
+ cbnz x4, .Lctr_enc_loop_##bits; \
+ b .Lctr_enc_done;
+
+ CTR_ENC(128)
+ CTR_ENC(192)
+ CTR_ENC(256)
+
+#undef CTR_ENC
+
+.Lctr_enc_done:
+ aes_clear_keys(w5)
+
+ st1 {v0.16b}, [x3] /* store IV */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+
+.Lctr_enc_skip:
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_cfb_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_enc_armv8_ce
+ELF(.type _gcry_aes_cfb_enc_armv8_ce,%function;)
+_gcry_aes_cfb_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * x4: nblocks
+ * w5: nrounds
+ */
+ CFI_STARTPROC();
+
+ cbz x4, .Lcfb_enc_skip
+
+ /* load IV */
+ ld1 {v0.16b}, [x3]
+
+ aes_preload_keys(x0, w5);
+
+ b.eq .Lcfb_enc_entry_192
+ b.hi .Lcfb_enc_entry_256
+
+#define CFB_ENC(bits) \
+ .Lcfb_enc_entry_##bits: \
+ .Lcfb_enc_loop_##bits: \
+ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+ sub x4, x4, #1; \
+ \
+ do_aes_one##bits(e, mc, v0, v0); \
+ \
+ eor v0.16b, v1.16b, v0.16b; \
+ st1 {v0.16b}, [x1], #16; /* store ciphertext */ \
+ \
+ cbnz x4, .Lcfb_enc_loop_##bits; \
+ b .Lcfb_enc_done;
+
+ CFB_ENC(128)
+ CFB_ENC(192)
+ CFB_ENC(256)
+
+#undef CFB_ENC
+
+.Lcfb_enc_done:
+ aes_clear_keys(w5)
+
+ st1 {v0.16b}, [x3] /* store IV */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+
+.Lcfb_enc_skip:
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_cfb_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_dec_armv8_ce
+ELF(.type _gcry_aes_cfb_dec_armv8_ce,%function;)
+_gcry_aes_cfb_dec_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: iv
+ * x4: nblocks
+ * w5: nrounds
+ */
+ CFI_STARTPROC();
+
+ cbz x4, .Lcfb_dec_skip
+
+ /* load IV */
+ ld1 {v0.16b}, [x3]
+
+ aes_preload_keys(x0, w5);
+
+ b.eq .Lcfb_dec_entry_192
+ b.hi .Lcfb_dec_entry_256
+
+#define CFB_DEC(bits) \
+ .Lcfb_dec_entry_##bits: \
+ cmp x4, #4; \
+ b.lo .Lcfb_dec_loop_##bits; \
+ \
+ .Lcfb_dec_loop4_##bits: \
+ \
+ ld1 {v2.16b-v4.16b}, [x2], #48; /* load ciphertext */ \
+ mov v1.16b, v0.16b; \
+ sub x4, x4, #4; \
+ cmp x4, #4; \
+ mov v5.16b, v2.16b; \
+ mov v6.16b, v3.16b; \
+ mov v7.16b, v4.16b; \
+ ld1 {v0.16b}, [x2], #16; /* load next IV / ciphertext */ \
+ \
+ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v5.16b; \
+ eor v2.16b, v2.16b, v6.16b; \
+ eor v3.16b, v3.16b, v7.16b; \
+ eor v4.16b, v4.16b, v0.16b; \
+ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+ \
+ b.hs .Lcfb_dec_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ cbz x4, .Lcfb_dec_done; \
+ \
+ .Lcfb_dec_loop_##bits: \
+ \
+ ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \
+ \
+ sub x4, x4, #1; \
+ \
+ do_aes_one##bits(e, mc, v0, v0); \
+ \
+ eor v2.16b, v1.16b, v0.16b; \
+ mov v0.16b, v1.16b; \
+ st1 {v2.16b}, [x1], #16; /* store plaintext */ \
+ \
+ cbnz x4, .Lcfb_dec_loop_##bits; \
+ b .Lcfb_dec_done;
+
+ CFB_DEC(128)
+ CFB_DEC(192)
+ CFB_DEC(256)
+
+#undef CFB_DEC
+
+.Lcfb_dec_done:
+ aes_clear_keys(w5)
+
+ st1 {v0.16b}, [x3] /* store IV */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+
+.Lcfb_dec_skip:
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ocb_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *offset,
+ * unsigned char *checksum,
+ * unsigned char *L_table,
+ * size_t nblocks,
+ * unsigned int nrounds,
+ * unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_enc_armv8_ce
+ELF(.type _gcry_aes_ocb_enc_armv8_ce,%function;)
+_gcry_aes_ocb_enc_armv8_ce:
+ /* input:
+ * x0: keysched
+ * x1: outbuf
+ * x2: inbuf
+ * x3: offset
+ * x4: checksum
+ * x5: Ltable
+ * x6: nblocks (0 < nblocks <= 32)
+ * w7: nrounds
+ * %st+0: blkn => w12
+ */
+ CFI_STARTPROC();
+
+ ldr w12, [sp]
+ ld1 {v0.16b}, [x3] /* load offset */
+ ld1 {v16.16b}, [x4] /* load checksum */
+
+ aes_preload_keys(x0, w7);
+
+ b.eq .Locb_enc_entry_192
+ b.hi .Locb_enc_entry_256
+
+#define OCB_ENC(bits, ...) \
+ .Locb_enc_entry_##bits: \
+ cmp x6, #4; \
+ add x12, x12, #1; \
+ b.lo .Locb_enc_loop_##bits; \
+ \
+ .Locb_enc_loop4_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \
+ \
+ add w9, w12, #1; \
+ add w10, w12, #2; \
+ add w11, w12, #3; \
+ rbit w8, w12; \
+ add w12, w12, #4; \
+ rbit w9, w9; \
+ rbit w10, w10; \
+ rbit w11, w11; \
+ clz w8, w8; /* ntz(i+0) */ \
+ clz w9, w9; /* ntz(i+1) */ \
+ clz w10, w10; /* ntz(i+2) */ \
+ clz w11, w11; /* ntz(i+3) */ \
+ add x8, x5, x8, lsl #4; \
+ ld1 {v1.16b-v4.16b}, [x2], #64; /* load P_i+<0-3> */ \
+ add x9, x5, x9, lsl #4; \
+ add x10, x5, x10, lsl #4; \
+ add x11, x5, x11, lsl #4; \
+ \
+ sub x6, x6, #4; \
+ \
+ ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \
+ eor v16.16b, v16.16b, v1.16b; /* Checksum_i+0 */ \
+ ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \
+ eor v16.16b, v16.16b, v2.16b; /* Checksum_i+1 */ \
+ ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \
+ eor v16.16b, v16.16b, v3.16b; /* Checksum_i+2 */ \
+ eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \
+ ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \
+ eor v16.16b, v16.16b, v4.16b; /* Checksum_i+3 */ \
+ eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \
+ eor v1.16b, v1.16b, v5.16b; /* P_i+0 xor Offset_i+0 */ \
+ eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \
+ eor v2.16b, v2.16b, v6.16b; /* P_i+1 xor Offset_i+1 */ \
+ eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \
+ cmp x6, #4; \
+ eor v3.16b, v3.16b, v7.16b; /* P_i+2 xor Offset_i+2 */ \
+ eor v4.16b, v4.16b, v0.16b; /* P_i+3 xor Offset_i+3 */ \
+ \
+ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v5.16b; /* xor Offset_i+0 */ \
+ eor v2.16b, v2.16b, v6.16b; /* xor Offset_i+1 */ \
+ eor v3.16b, v3.16b, v7.16b; /* xor Offset_i+2 */ \
+ eor v4.16b, v4.16b, v0.16b; /* xor Offset_i+3 */ \
+ st1 {v1.16b-v4.16b}, [x1], #64; \
+ \
+ b.hs .Locb_enc_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ cbz x6, .Locb_enc_done; \
+ \
+ .Locb_enc_loop_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \
+ \
+ rbit x8, x12; \
+ add x12, x12, #1; \
+ clz x8, x8; /* ntz(i) */ \
+ add x8, x5, x8, lsl #4; \
+ \
+ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+ ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
+ sub x6, x6, #1; \
+ eor v0.16b, v0.16b, v2.16b; \
+ eor v16.16b, v16.16b, v1.16b; \
+ eor v1.16b, v1.16b, v0.16b; \
+ \
+ do_aes_one##bits(e, mc, v1, v1); \
+ \
+ eor v1.16b, v1.16b, v0.16b; \
+ st1 {v1.16b}, [x1], #16; /* store ciphertext */ \
+ \
+ cbnz x6, .Locb_enc_loop_##bits; \
+ b .Locb_enc_done;
+
+ OCB_ENC(128)
+ OCB_ENC(192)
+ OCB_ENC(256)
+
+#undef OCB_ENC
+
+.Locb_enc_done:
+ aes_clear_keys(w7)
+
+ st1 {v16.16b}, [x4] /* store checksum */
+ st1 {v0.16b}, [x3] /* store offset */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+ CLEAR_REG(v16)
+
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ocb_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *offset,
+ * unsigned char *checksum,
+ * unsigned char *L_table,
+ * size_t nblocks,
+ * unsigned int nrounds,
+ * unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_dec_armv8_ce
+ELF(.type _gcry_aes_ocb_dec_armv8_ce,%function;)
+_gcry_aes_ocb_dec_armv8_ce:
+ /* input:
+ * x0: keysched
+ * x1: outbuf
+ * x2: inbuf
+ * x3: offset
+ * x4: checksum
+ * x5: Ltable
+ * x6: nblocks (0 < nblocks <= 32)
+ * w7: nrounds
+ * %st+0: blkn => w12
+ */
+ CFI_STARTPROC();
+
+ ldr w12, [sp]
+ ld1 {v0.16b}, [x3] /* load offset */
+ ld1 {v16.16b}, [x4] /* load checksum */
+
+ aes_preload_keys(x0, w7);
+
+ b.eq .Locb_dec_entry_192
+ b.hi .Locb_dec_entry_256
+
+#define OCB_DEC(bits) \
+ .Locb_dec_entry_##bits: \
+ cmp x6, #4; \
+ add w12, w12, #1; \
+ b.lo .Locb_dec_loop_##bits; \
+ \
+ .Locb_dec_loop4_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ \
+ add w9, w12, #1; \
+ add w10, w12, #2; \
+ add w11, w12, #3; \
+ rbit w8, w12; \
+ add w12, w12, #4; \
+ rbit w9, w9; \
+ rbit w10, w10; \
+ rbit w11, w11; \
+ clz w8, w8; /* ntz(i+0) */ \
+ clz w9, w9; /* ntz(i+1) */ \
+ clz w10, w10; /* ntz(i+2) */ \
+ clz w11, w11; /* ntz(i+3) */ \
+ add x8, x5, x8, lsl #4; \
+ ld1 {v1.16b-v4.16b}, [x2], #64; /* load C_i+<0-3> */ \
+ add x9, x5, x9, lsl #4; \
+ add x10, x5, x10, lsl #4; \
+ add x11, x5, x11, lsl #4; \
+ \
+ sub x6, x6, #4; \
+ \
+ ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \
+ ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \
+ ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \
+ eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \
+ ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \
+ eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \
+ eor v1.16b, v1.16b, v5.16b; /* C_i+0 xor Offset_i+0 */ \
+ eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \
+ eor v2.16b, v2.16b, v6.16b; /* C_i+1 xor Offset_i+1 */ \
+ eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \
+ cmp x6, #4; \
+ eor v3.16b, v3.16b, v7.16b; /* C_i+2 xor Offset_i+2 */ \
+ eor v4.16b, v4.16b, v0.16b; /* C_i+3 xor Offset_i+3 */ \
+ \
+ do_aes_4_##bits(d, imc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v5.16b; /* xor Offset_i+0 */ \
+ eor v2.16b, v2.16b, v6.16b; /* xor Offset_i+1 */ \
+ eor v16.16b, v16.16b, v1.16b; /* Checksum_i+0 */ \
+ eor v3.16b, v3.16b, v7.16b; /* xor Offset_i+2 */ \
+ eor v16.16b, v16.16b, v2.16b; /* Checksum_i+1 */ \
+ eor v4.16b, v4.16b, v0.16b; /* xor Offset_i+3 */ \
+ eor v16.16b, v16.16b, v3.16b; /* Checksum_i+2 */ \
+ eor v16.16b, v16.16b, v4.16b; /* Checksum_i+3 */ \
+ st1 {v1.16b-v4.16b}, [x1], #64; \
+ \
+ b.hs .Locb_dec_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ cbz x6, .Locb_dec_done; \
+ \
+ .Locb_dec_loop_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \
+ /* Checksum_i = Checksum_{i-1} xor P_i */ \
+ \
+ rbit w8, w12; \
+ add w12, w12, #1; \
+ clz w8, w8; /* ntz(i) */ \
+ add x8, x5, x8, lsl #4; \
+ \
+ ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \
+ ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
+ sub x6, x6, #1; \
+ eor v0.16b, v0.16b, v2.16b; \
+ eor v1.16b, v1.16b, v0.16b; \
+ \
+ do_aes_one##bits(d, imc, v1, v1) \
+ \
+ eor v1.16b, v1.16b, v0.16b; \
+ st1 {v1.16b}, [x1], #16; /* store plaintext */ \
+ eor v16.16b, v16.16b, v1.16b; \
+ \
+ cbnz x6, .Locb_dec_loop_##bits; \
+ b .Locb_dec_done;
+
+ OCB_DEC(128)
+ OCB_DEC(192)
+ OCB_DEC(256)
+
+#undef OCB_DEC
+
+.Locb_dec_done:
+ aes_clear_keys(w7)
+
+ st1 {v16.16b}, [x4] /* store checksum */
+ st1 {v0.16b}, [x3] /* store offset */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+ CLEAR_REG(v16)
+
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ocb_auth_armv8_ce (const void *keysched,
+ * const unsigned char *abuf,
+ * unsigned char *offset,
+ * unsigned char *checksum,
+ * unsigned char *L_table,
+ * size_t nblocks,
+ * unsigned int nrounds,
+ * unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_auth_armv8_ce
+ELF(.type _gcry_aes_ocb_auth_armv8_ce,%function;)
+_gcry_aes_ocb_auth_armv8_ce:
+ /* input:
+ * x0: keysched
+ * x1: abuf
+ * x2: offset => x3
+ * x3: checksum => x4
+ * x4: Ltable => x5
+ * x5: nblocks => x6 (0 < nblocks <= 32)
+ * w6: nrounds => w7
+ * w7: blkn => w12
+ */
+ CFI_STARTPROC();
+
+ mov w12, w7
+ mov w7, w6
+ mov x6, x5
+ mov x5, x4
+ mov x4, x3
+ mov x3, x2
+
+ aes_preload_keys(x0, w7);
+
+ ld1 {v0.16b}, [x3] /* load offset */
+ ld1 {v16.16b}, [x4] /* load checksum */
+
+ beq .Locb_auth_entry_192
+ bhi .Locb_auth_entry_256
+
+#define OCB_AUTH(bits) \
+ .Locb_auth_entry_##bits: \
+ cmp x6, #4; \
+ add w12, w12, #1; \
+ b.lo .Locb_auth_loop_##bits; \
+ \
+ .Locb_auth_loop4_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \
+ \
+ add w9, w12, #1; \
+ add w10, w12, #2; \
+ add w11, w12, #3; \
+ rbit w8, w12; \
+ add w12, w12, #4; \
+ rbit w9, w9; \
+ rbit w10, w10; \
+ rbit w11, w11; \
+ clz w8, w8; /* ntz(i+0) */ \
+ clz w9, w9; /* ntz(i+1) */ \
+ clz w10, w10; /* ntz(i+2) */ \
+ clz w11, w11; /* ntz(i+3) */ \
+ add x8, x5, x8, lsl #4; \
+ ld1 {v1.16b-v4.16b}, [x1], #64; /* load A_i+<0-3> */ \
+ add x9, x5, x9, lsl #4; \
+ add x10, x5, x10, lsl #4; \
+ add x11, x5, x11, lsl #4; \
+ \
+ sub x6, x6, #4; \
+ \
+ ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \
+ ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \
+ ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \
+ eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \
+ ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \
+ eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \
+ eor v1.16b, v1.16b, v5.16b; /* A_i+0 xor Offset_i+0 */ \
+ eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \
+ eor v2.16b, v2.16b, v6.16b; /* A_i+1 xor Offset_i+1 */ \
+ eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \
+ cmp x6, #4; \
+ eor v3.16b, v3.16b, v7.16b; /* A_i+2 xor Offset_i+2 */ \
+ eor v4.16b, v4.16b, v0.16b; /* A_i+3 xor Offset_i+3 */ \
+ \
+ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v2.16b; \
+ eor v16.16b, v16.16b, v3.16b; \
+ eor v1.16b, v1.16b, v4.16b; \
+ eor v16.16b, v16.16b, v1.16b; \
+ \
+ b.hs .Locb_auth_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ cbz x6, .Locb_auth_done; \
+ \
+ .Locb_auth_loop_##bits: \
+ \
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \
+ \
+ rbit w8, w12; \
+ add w12, w12, #1; \
+ clz w8, w8; /* ntz(i) */ \
+ add x8, x5, x8, lsl #4; \
+ \
+ ld1 {v1.16b}, [x1], #16; /* load aadtext */ \
+ ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
+ sub x6, x6, #1; \
+ eor v0.16b, v0.16b, v2.16b; \
+ eor v1.16b, v1.16b, v0.16b; \
+ \
+ do_aes_one##bits(e, mc, v1, v1) \
+ \
+ eor v16.16b, v16.16b, v1.16b; \
+ \
+ cbnz x6, .Locb_auth_loop_##bits; \
+ b .Locb_auth_done;
+
+ OCB_AUTH(128)
+ OCB_AUTH(192)
+ OCB_AUTH(256)
+
+#undef OCB_AUTH
+
+.Locb_auth_done:
+ aes_clear_keys(w7)
+
+ st1 {v16.16b}, [x4] /* store checksum */
+ st1 {v0.16b}, [x3] /* store offset */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+ CLEAR_REG(v16)
+
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *tweak,
+ * size_t nblocks,
+ * unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_enc_armv8_ce
+ELF(.type _gcry_aes_xts_enc_armv8_ce,%function;)
+_gcry_aes_xts_enc_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: tweak
+ * x4: nblocks
+ * w5: nrounds
+ */
+ CFI_STARTPROC();
+
+ cbz x4, .Lxts_enc_skip
+
+ /* load tweak */
+ ld1 {v0.16b}, [x3]
+
+ /* load gfmul mask */
+ mov x6, #0x87
+ mov x7, #0x01
+ mov v16.D[0], x6
+ mov v16.D[1], x7
+
+ aes_preload_keys(x0, w5);
+
+ b.eq .Lxts_enc_entry_192
+ b.hi .Lxts_enc_entry_256
+
+#define XTS_ENC(bits) \
+ .Lxts_enc_entry_##bits: \
+ cmp x4, #4; \
+ b.lo .Lxts_enc_loop_##bits; \
+ \
+ .Lxts_enc_loop4_##bits: \
+ \
+ ext v4.16b, v0.16b, v0.16b, #8; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v5.2d, v0.2d, v0.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v5.16b, v5.16b, v2.16b; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v6.2d, v5.2d, v5.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v6.16b, v6.16b, v2.16b; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v7.2d, v6.2d, v6.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v7.16b, v7.16b, v2.16b; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v3.2d, v7.2d, v7.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v3.16b, v3.16b, v2.16b; \
+ ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \
+ st1 {v3.16b}, [x3]; \
+ sub x4, x4, #4; \
+ eor v1.16b, v1.16b, v0.16b; \
+ \
+ ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \
+ cmp x4, #4; \
+ eor v2.16b, v2.16b, v5.16b; \
+ eor v3.16b, v3.16b, v6.16b; \
+ eor v4.16b, v4.16b, v7.16b; \
+ \
+ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v0.16b; \
+ ld1 {v0.16b}, [x3]; \
+ eor v2.16b, v2.16b, v5.16b; \
+ eor v3.16b, v3.16b, v6.16b; \
+ eor v4.16b, v4.16b, v7.16b; \
+ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+ \
+ b.hs .Lxts_enc_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ cbz x4, .Lxts_enc_done; \
+ \
+ .Lxts_enc_loop_##bits: \
+ \
+ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+ ext v3.16b, v0.16b, v0.16b, #8; \
+ mov v2.16b, v0.16b; \
+ sshr v3.2d, v3.2d, #63; \
+ add v0.2d, v0.2d, v0.2d; \
+ and v3.16b, v3.16b, v16.16b; \
+ eor v1.16b, v1.16b, v2.16b; \
+ eor v0.16b, v0.16b, v3.16b; \
+ sub x4, x4, #1; \
+ \
+ do_aes_one##bits(e, mc, v1, v1); \
+ \
+ eor v1.16b, v1.16b, v2.16b; \
+ st1 {v1.16b}, [x1], #16; /* store ciphertext */ \
+ \
+ cbnz x4, .Lxts_enc_loop_##bits; \
+ b .Lxts_enc_done;
+
+ XTS_ENC(128)
+ XTS_ENC(192)
+ XTS_ENC(256)
+
+#undef XTS_ENC
+
+.Lxts_enc_done:
+ aes_clear_keys(w5)
+
+ st1 {v0.16b}, [x3] /* store tweak */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+
+.Lxts_enc_skip:
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+ * unsigned char *outbuf,
+ * const unsigned char *inbuf,
+ * unsigned char *tweak,
+ * size_t nblocks,
+ * unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_dec_armv8_ce
+ELF(.type _gcry_aes_xts_dec_armv8_ce,%function;)
+_gcry_aes_xts_dec_armv8_ce:
+ /* input:
+ * r0: keysched
+ * r1: outbuf
+ * r2: inbuf
+ * r3: tweak
+ * x4: nblocks
+ * w5: nrounds
+ */
+ CFI_STARTPROC();
+
+ cbz x4, .Lxts_dec_skip
+
+ /* load tweak */
+ ld1 {v0.16b}, [x3]
+
+ /* load gfmul mask */
+ mov x6, #0x87
+ mov x7, #0x01
+ mov v16.D[0], x6
+ mov v16.D[1], x7
+
+ aes_preload_keys(x0, w5);
+
+ b.eq .Lxts_dec_entry_192
+ b.hi .Lxts_dec_entry_256
+
+#define XTS_DEC(bits) \
+ .Lxts_dec_entry_##bits: \
+ cmp x4, #4; \
+ b.lo .Lxts_dec_loop_##bits; \
+ \
+ .Lxts_dec_loop4_##bits: \
+ \
+ ext v4.16b, v0.16b, v0.16b, #8; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v5.2d, v0.2d, v0.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v5.16b, v5.16b, v2.16b; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v6.2d, v5.2d, v5.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v6.16b, v6.16b, v2.16b; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v7.2d, v6.2d, v6.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v7.16b, v7.16b, v2.16b; \
+ \
+ sshr v2.2d, v4.2d, #63; \
+ add v3.2d, v7.2d, v7.2d; \
+ and v2.16b, v2.16b, v16.16b; \
+ add v4.2d, v4.2d, v4.2d; \
+ eor v3.16b, v3.16b, v2.16b; \
+ ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \
+ st1 {v3.16b}, [x3]; \
+ sub x4, x4, #4; \
+ eor v1.16b, v1.16b, v0.16b; \
+ \
+ ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \
+ cmp x4, #4; \
+ eor v2.16b, v2.16b, v5.16b; \
+ eor v3.16b, v3.16b, v6.16b; \
+ eor v4.16b, v4.16b, v7.16b; \
+ \
+ do_aes_4_##bits(d, imc, v1, v2, v3, v4); \
+ \
+ eor v1.16b, v1.16b, v0.16b; \
+ ld1 {v0.16b}, [x3]; \
+ eor v2.16b, v2.16b, v5.16b; \
+ eor v3.16b, v3.16b, v6.16b; \
+ eor v4.16b, v4.16b, v7.16b; \
+ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+ \
+ b.hs .Lxts_dec_loop4_##bits; \
+ CLEAR_REG(v3); \
+ CLEAR_REG(v4); \
+ CLEAR_REG(v5); \
+ CLEAR_REG(v6); \
+ CLEAR_REG(v7); \
+ cbz x4, .Lxts_dec_done; \
+ \
+ .Lxts_dec_loop_##bits: \
+ \
+ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+ ext v3.16b, v0.16b, v0.16b, #8; \
+ mov v2.16b, v0.16b; \
+ sshr v3.2d, v3.2d, #63; \
+ add v0.2d, v0.2d, v0.2d; \
+ and v3.16b, v3.16b, v16.16b; \
+ eor v1.16b, v1.16b, v2.16b; \
+ eor v0.16b, v0.16b, v3.16b; \
+ sub x4, x4, #1; \
+ \
+ do_aes_one##bits(d, imc, v1, v1); \
+ \
+ eor v1.16b, v1.16b, v2.16b; \
+ st1 {v1.16b}, [x1], #16; /* store ciphertext */ \
+ \
+ cbnz x4, .Lxts_dec_loop_##bits; \
+ b .Lxts_dec_done;
+
+ XTS_DEC(128)
+ XTS_DEC(192)
+ XTS_DEC(256)
+
+#undef XTS_DEC
+
+.Lxts_dec_done:
+ aes_clear_keys(w5)
+
+ st1 {v0.16b}, [x3] /* store tweak */
+
+ CLEAR_REG(v0)
+ CLEAR_REG(v1)
+ CLEAR_REG(v2)
+
+.Lxts_dec_skip:
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;)
+
+
+/*
+ * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
+ */
+.align 3
+.globl _gcry_aes_sbox4_armv8_ce
+ELF(.type _gcry_aes_sbox4_armv8_ce,%function;)
+_gcry_aes_sbox4_armv8_ce:
+ /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+ CFI_STARTPROC();
+ movi v0.16b, #0x52
+ movi v1.16b, #0
+ mov v0.S[0], w0
+ aese v0.16b, v1.16b
+ addv s0, v0.4s
+ mov w0, v0.S[0]
+ CLEAR_REG(v0)
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src);
+ */
+.align 3
+.globl _gcry_aes_invmixcol_armv8_ce
+ELF(.type _gcry_aes_invmixcol_armv8_ce,%function;)
+_gcry_aes_invmixcol_armv8_ce:
+ CFI_STARTPROC();
+ ld1 {v0.16b}, [x1]
+ aesimc v0.16b, v0.16b
+ st1 {v0.16b}, [x0]
+ CLEAR_REG(v0)
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;)
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-armv8-ce.c b/comm/third_party/libgcrypt/cipher/rijndael-armv8-ce.c
new file mode 100644
index 0000000000..6e46830ee4
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-armv8-ce.c
@@ -0,0 +1,414 @@
+/* ARMv8 Crypto Extension AES for Libgcrypt
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_ARM_CE
+
+
+typedef struct u128_s { u32 a, b, c, d; } u128_t;
+
+extern u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
+extern void _gcry_aes_invmixcol_armv8_ce(u128_t *dst, const u128_t *src);
+
+extern unsigned int _gcry_aes_enc_armv8_ce(const void *keysched, byte *dst,
+ const byte *src,
+ unsigned int nrounds);
+extern unsigned int _gcry_aes_dec_armv8_ce(const void *keysched, byte *dst,
+ const byte *src,
+ unsigned int nrounds);
+
+extern void _gcry_aes_cbc_enc_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *iv, size_t nblocks,
+ int cbc_mac, unsigned int nrounds);
+extern void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *iv, size_t nblocks,
+ unsigned int nrounds);
+
+extern void _gcry_aes_cfb_enc_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *iv, size_t nblocks,
+ unsigned int nrounds);
+extern void _gcry_aes_cfb_dec_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *iv, size_t nblocks,
+ unsigned int nrounds);
+
+extern void _gcry_aes_ctr_enc_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *iv, size_t nblocks,
+ unsigned int nrounds);
+
+extern void _gcry_aes_ocb_enc_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ unsigned char *L_table,
+ size_t nblocks,
+ unsigned int nrounds,
+ unsigned int blkn);
+extern void _gcry_aes_ocb_dec_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ unsigned char *L_table,
+ size_t nblocks,
+ unsigned int nrounds,
+ unsigned int blkn);
+extern void _gcry_aes_ocb_auth_armv8_ce (const void *keysched,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ unsigned char *L_table,
+ size_t nblocks,
+ unsigned int nrounds,
+ unsigned int blkn);
+extern void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *tweak,
+ size_t nblocks, unsigned int nrounds);
+extern void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+ unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *tweak,
+ size_t nblocks, unsigned int nrounds);
+
+typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *offset, unsigned char *checksum,
+ unsigned char *L_table, size_t nblocks,
+ unsigned int nrounds, unsigned int blkn);
+
+typedef void (*xts_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
+ const unsigned char *inbuf,
+ unsigned char *tweak, size_t nblocks,
+ unsigned int nrounds);
+
+void
+_gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ byte data[MAXKC][4];
+ u32 data32[MAXKC];
+ } tkk[2];
+ unsigned int rounds = ctx->rounds;
+ int KC = rounds - 6;
+ unsigned int keylen = KC * 4;
+ unsigned int i, r, t;
+ byte rcon = 1;
+ int j;
+#define k tkk[0].data
+#define k_u32 tkk[0].data32
+#define tk tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W (ctx->keyschenc)
+#define W_u32 (ctx->keyschenc32)
+
+ for (i = 0; i < keylen; i++)
+ {
+ k[i >> 2][i & 3] = key[i];
+ }
+
+ for (j = KC-1; j >= 0; j--)
+ {
+ tk_u32[j] = k_u32[j];
+ }
+ r = 0;
+ t = 0;
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+
+ while (r < rounds + 1)
+ {
+ tk_u32[0] ^= _gcry_aes_sbox4_armv8_ce(rol(tk_u32[KC - 1], 24)) ^ rcon;
+
+ if (KC != 8)
+ {
+ for (j = 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+ else
+ {
+ for (j = 1; j < KC/2; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+
+ tk_u32[KC/2] ^= _gcry_aes_sbox4_armv8_ce(tk_u32[KC/2 - 1]);
+
+ for (j = KC/2 + 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+
+ rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b);
+ }
+
+#undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+ wipememory(&tkk, sizeof(tkk));
+}
+
+/* Make a decryption key from an encryption key. */
+void
+_gcry_aes_armv8_ce_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+ u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
+ int rounds = ctx->rounds;
+ int rr;
+ int r;
+
+#define DO_AESIMC() _gcry_aes_invmixcol_armv8_ce(&dkey[r], &ekey[rr])
+
+ dkey[0] = ekey[rounds];
+ r = 1;
+ rr = rounds-1;
+ DO_AESIMC(); r++; rr--; /* round 1 */
+ DO_AESIMC(); r++; rr--; /* round 2 */
+ DO_AESIMC(); r++; rr--; /* round 3 */
+ DO_AESIMC(); r++; rr--; /* round 4 */
+ DO_AESIMC(); r++; rr--; /* round 5 */
+ DO_AESIMC(); r++; rr--; /* round 6 */
+ DO_AESIMC(); r++; rr--; /* round 7 */
+ DO_AESIMC(); r++; rr--; /* round 8 */
+ DO_AESIMC(); r++; rr--; /* round 9 */
+ if (rounds >= 12)
+ {
+ if (rounds > 12)
+ {
+ DO_AESIMC(); r++; rr--; /* round 10 */
+ DO_AESIMC(); r++; rr--; /* round 11 */
+ }
+
+ DO_AESIMC(); r++; rr--; /* round 12 / 10 */
+ DO_AESIMC(); r++; rr--; /* round 13 / 11 */
+ }
+
+ dkey[r] = ekey[0];
+
+#undef DO_AESIMC
+}
+
+unsigned int
+_gcry_aes_armv8_ce_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+ const unsigned char *src)
+{
+ const void *keysched = ctx->keyschenc32;
+ unsigned int nrounds = ctx->rounds;
+
+ return _gcry_aes_enc_armv8_ce(keysched, dst, src, nrounds);
+}
+
+unsigned int
+_gcry_aes_armv8_ce_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+ const unsigned char *src)
+{
+ const void *keysched = ctx->keyschdec32;
+ unsigned int nrounds = ctx->rounds;
+
+ return _gcry_aes_dec_armv8_ce(keysched, dst, src, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks, int cbc_mac)
+{
+ const void *keysched = ctx->keyschenc32;
+ unsigned int nrounds = ctx->rounds;
+
+ _gcry_aes_cbc_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, cbc_mac,
+ nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ const void *keysched = ctx->keyschdec32;
+ unsigned int nrounds = ctx->rounds;
+
+ if ( !ctx->decryption_prepared )
+ {
+ _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+
+ _gcry_aes_cbc_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ const void *keysched = ctx->keyschenc32;
+ unsigned int nrounds = ctx->rounds;
+
+ _gcry_aes_cfb_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ const void *keysched = ctx->keyschenc32;
+ unsigned int nrounds = ctx->rounds;
+
+ _gcry_aes_cfb_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ const void *keysched = ctx->keyschenc32;
+ unsigned int nrounds = ctx->rounds;
+
+ _gcry_aes_ctr_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+size_t
+_gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32;
+ ocb_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_ocb_enc_armv8_ce
+ : _gcry_aes_ocb_dec_armv8_ce;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int nrounds = ctx->rounds;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+
+ if ( !encrypt && !ctx->decryption_prepared )
+ {
+ _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+
+ c->u_mode.ocb.data_nblocks = blkn + nblocks;
+
+ crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+ c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn);
+
+ return 0;
+}
+
+size_t
+_gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const void *keysched = ctx->keyschenc32;
+ const unsigned char *abuf = abuf_arg;
+ unsigned int nrounds = ctx->rounds;
+ u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+ c->u_mode.ocb.aad_nblocks = blkn + nblocks;
+
+ _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0],
+ nblocks, nrounds, (unsigned int)blkn);
+
+ return 0;
+}
+
+void
+_gcry_aes_armv8_ce_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks, int encrypt)
+{
+ const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32;
+ xts_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_xts_enc_armv8_ce
+ : _gcry_aes_xts_dec_armv8_ce;
+ unsigned int nrounds = ctx->rounds;
+
+ if ( !encrypt && !ctx->decryption_prepared )
+ {
+ _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+
+ crypt_fn(keysched, outbuf, inbuf, tweak, nblocks, nrounds);
+}
+
+#endif /* USE_ARM_CE */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-internal.h b/comm/third_party/libgcrypt/cipher/rijndael-internal.h
new file mode 100644
index 0000000000..7e01f6b057
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-internal.h
@@ -0,0 +1,194 @@
+/* Rijndael (AES) for GnuPG
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ * 2008, 2011, 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_RIJNDAEL_INTERNAL_H
+#define G10_RIJNDAEL_INTERNAL_H
+
+#include "types.h" /* for byte and u32 typedefs */
+
+
+#define MAXKC (256/32)
+#define MAXROUNDS 14
+#define BLOCKSIZE (128/8)
+
+
+/* Helper macro to force alignment to 16 or 64 bytes. */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16 __attribute__ ((aligned (16)))
+# define ATTR_ALIGNED_64 __attribute__ ((aligned (64)))
+#else
+# define ATTR_ALIGNED_16
+# define ATTR_ALIGNED_64
+#endif
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_SSSE3 indicates whether to use SSSE3 code. */
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+# define USE_ARM_ASM 1
+# endif
+#endif
+#if defined(__AARCH64EL__)
+# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+# define USE_ARM_ASM 1
+# endif
+#endif
+
+/* USE_PADLOCK indicates whether to compile the padlock specific
+ code. */
+#undef USE_PADLOCK
+#ifdef ENABLE_PADLOCK_SUPPORT
+# ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# if (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)
+# define USE_PADLOCK 1
+# endif
+# endif
+#endif /* ENABLE_PADLOCK_SUPPORT */
+
+/* USE_AESNI inidicates whether to compile with Intel AES-NI code. We
+ need the vector-size attribute which seems to be available since
+ gcc 3. However, to be on the safe side we require at least gcc 4. */
+#undef USE_AESNI
+#ifdef ENABLE_AESNI_SUPPORT
+# if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+# if __GNUC__ >= 4
+# define USE_AESNI 1
+# endif
+# endif
+#endif /* ENABLE_AESNI_SUPPORT */
+
+/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
+ * code. */
+#undef USE_ARM_CE
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+# define USE_ARM_CE 1
+# elif defined(__AARCH64EL__) \
+ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+# define USE_ARM_CE 1
+# endif
+#endif /* ENABLE_ARM_CRYPTO_SUPPORT */
+
+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code. USE_PPC_CRYPTO_WITH_PPC9LE indicates whether to
+ * enable POWER9 optimized variant. */
+#undef USE_PPC_CRYPTO
+#undef USE_PPC_CRYPTO_WITH_PPC9LE
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+# if __GNUC__ >= 4
+# define USE_PPC_CRYPTO 1
+# if !defined(WORDS_BIGENDIAN) && defined(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00)
+# define USE_PPC_CRYPTO_WITH_PPC9LE 1
+# endif
+# endif
+# endif
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+struct RIJNDAEL_context_s;
+
+typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx,
+ unsigned char *bx,
+ const unsigned char *ax);
+typedef void (*rijndael_prefetchfn_t)(void);
+typedef void (*rijndael_prepare_decfn_t)(struct RIJNDAEL_context_s *ctx);
+
+/* Our context object. */
+typedef struct RIJNDAEL_context_s
+{
+ /* The first fields are the keyschedule arrays. This is so that
+ they are aligned on a 16 byte boundary if using gcc. This
+ alignment is required for the AES-NI code and a good idea in any
+ case. The alignment is guaranteed due to the way cipher.c
+ allocates the space for the context. The PROPERLY_ALIGNED_TYPE
+ hack is used to force a minimal alignment if not using gcc of if
+ the alignment requirement is higher that 16 bytes. */
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ byte keyschedule[MAXROUNDS+1][4][4];
+ u32 keyschedule32[MAXROUNDS+1][4];
+#ifdef USE_PADLOCK
+ /* The key as passed to the padlock engine. It is only used if
+ the padlock engine is used (USE_PADLOCK, below). */
+ unsigned char padlock_key[16] __attribute__ ((aligned (16)));
+#endif /*USE_PADLOCK*/
+ } u1;
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ byte keyschedule[MAXROUNDS+1][4][4];
+ u32 keyschedule32[MAXROUNDS+1][4];
+ } u2;
+ int rounds; /* Key-length-dependent number of rounds. */
+ unsigned int decryption_prepared:1; /* The decryption key schedule is available. */
+#ifdef USE_AESNI
+ unsigned int use_avx:1; /* AVX shall be used by AES-NI implementation. */
+ unsigned int use_avx2:1; /* AVX2 shall be used by AES-NI implementation. */
+#endif /*USE_AESNI*/
+#ifdef USE_S390X_CRYPTO
+ byte km_func;
+ byte km_func_xts;
+ byte kmc_func;
+ byte kmac_func;
+ byte kmf_func;
+ byte kmo_func;
+ byte kma_func;
+#endif /*USE_S390X_CRYPTO*/
+ rijndael_cryptfn_t encrypt_fn;
+ rijndael_cryptfn_t decrypt_fn;
+ rijndael_prefetchfn_t prefetch_enc_fn;
+ rijndael_prefetchfn_t prefetch_dec_fn;
+ rijndael_prepare_decfn_t prepare_decryption;
+} RIJNDAEL_context ATTR_ALIGNED_16;
+
+/* Macros defining alias for the keyschedules. */
+#define keyschenc u1.keyschedule
+#define keyschenc32 u1.keyschedule32
+#define keyschdec u2.keyschedule
+#define keyschdec32 u2.keyschedule32
+#define padlockkey u1.padlock_key
+
+#endif /* G10_RIJNDAEL_INTERNAL_H */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-padlock.c b/comm/third_party/libgcrypt/cipher/rijndael-padlock.c
new file mode 100644
index 0000000000..3af214d74e
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-padlock.c
@@ -0,0 +1,110 @@
+/* Padlock accelerated AES for Libgcrypt
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ * 2008, 2011, 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+
+#ifdef USE_PADLOCK
+
+/* Encrypt or decrypt one block using the padlock engine. A and B may
+ be the same. */
+static unsigned int
+do_padlock (const RIJNDAEL_context *ctx, unsigned char *bx,
+ const unsigned char *ax, int decrypt_flag)
+{
+ /* BX and AX are not necessary correctly aligned. Thus we need to
+ copy them here. */
+ unsigned char a[16] __attribute__ ((aligned (16)));
+ unsigned char b[16] __attribute__ ((aligned (16)));
+ unsigned int cword[4] __attribute__ ((aligned (16)));
+ unsigned char *pa = a;
+ unsigned char *pb = b;
+ int blocks;
+
+ /* The control word fields are:
+ 127:12 11:10 9 8 7 6 5 4 3:0
+ RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND */
+ cword[0] = (ctx->rounds & 15); /* (The mask is just a safeguard.) */
+ cword[1] = 0;
+ cword[2] = 0;
+ cword[3] = 0;
+ if (decrypt_flag)
+ cword[0] |= 0x00000200;
+
+ memcpy (a, ax, 16);
+
+ blocks = 1; /* Init counter for just one block. */
+#ifdef __x86_64__
+ asm volatile
+ ("pushfq\n\t" /* Force key reload. */
+ "popfq\n\t"
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+ : "+S" (pa), "+D" (pb), "+c" (blocks)
+ : "d" (cword), "b" (ctx->padlockkey)
+ : "cc", "memory"
+ );
+#else
+ asm volatile
+ ("pushfl\n\t" /* Force key reload. */
+ "popfl\n\t"
+ "xchg %4, %%ebx\n\t" /* Load key. */
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+ "xchg %4, %%ebx\n" /* Restore GOT register. */
+ : "+S" (pa), "+D" (pb), "+c" (blocks)
+ : "d" (cword), "r" (ctx->padlockkey)
+ : "cc", "memory"
+ );
+#endif
+
+ memcpy (bx, b, 16);
+
+ return (48 + 15 /* possible padding for alignment */);
+}
+
+unsigned int
+_gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx,
+ unsigned char *bx, const unsigned char *ax)
+{
+ return do_padlock(ctx, bx, ax, 0);
+}
+
+unsigned int
+_gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx,
+ unsigned char *bx, const unsigned char *ax)
+{
+ return do_padlock(ctx, bx, ax, 1);
+}
+
+void
+_gcry_aes_padlock_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ /* Padlock does not need decryption subkeys. */
+ (void)ctx;
+}
+#endif /* USE_PADLOCK */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-ppc-common.h b/comm/third_party/libgcrypt/cipher/rijndael-ppc-common.h
new file mode 100644
index 0000000000..bbbeaac035
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-ppc-common.h
@@ -0,0 +1,342 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#ifndef G10_RIJNDAEL_PPC_COMMON_H
+#define G10_RIJNDAEL_PPC_COMMON_H
+
+#include <altivec.h>
+
+
+typedef vector unsigned char block;
+
+typedef union
+{
+ u32 data32[4];
+} __attribute__((packed, aligned(1), may_alias)) u128_t;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+#define ALIGNED_LOAD(in_ptr, offs) \
+ (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr)))
+
+#define ALIGNED_STORE(out_ptr, offs, vec) \
+ (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr)))
+
+#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const)))
+
+#define VEC_LOAD_BE(in_ptr, offs, bige_const) \
+ (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \
+ bige_const))
+
+#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \
+ (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr)))
+
+#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \
+ (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \
+ (void *)(out_ptr)))
+
+#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \
+ (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr)))
+
+
+#define ROUND_KEY_VARIABLES \
+ block rkey0, rkeylast
+
+#define PRELOAD_ROUND_KEYS(nrounds) \
+ do { \
+ rkey0 = ALIGNED_LOAD (rk, 0); \
+ rkeylast = ALIGNED_LOAD (rk, nrounds); \
+ } while (0)
+
+#define AES_ENCRYPT(blk, nrounds) \
+ do { \
+ blk ^= rkey0; \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+ if (nrounds >= 12) \
+ { \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+ if (rounds > 12) \
+ { \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+ } \
+ } \
+ blk = asm_cipherlast_be (blk, rkeylast); \
+ } while (0)
+
+#define AES_DECRYPT(blk, nrounds) \
+ do { \
+ blk ^= rkey0; \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+ if (nrounds >= 12) \
+ { \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+ if (rounds > 12) \
+ { \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+ } \
+ } \
+ blk = asm_ncipherlast_be (blk, rkeylast); \
+ } while (0)
+
+
+#define ROUND_KEY_VARIABLES_ALL \
+ block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \
+ rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast
+
+#define PRELOAD_ROUND_KEYS_ALL(nrounds) \
+ do { \
+ rkey0 = ALIGNED_LOAD (rk, 0); \
+ rkey1 = ALIGNED_LOAD (rk, 1); \
+ rkey2 = ALIGNED_LOAD (rk, 2); \
+ rkey3 = ALIGNED_LOAD (rk, 3); \
+ rkey4 = ALIGNED_LOAD (rk, 4); \
+ rkey5 = ALIGNED_LOAD (rk, 5); \
+ rkey6 = ALIGNED_LOAD (rk, 6); \
+ rkey7 = ALIGNED_LOAD (rk, 7); \
+ rkey8 = ALIGNED_LOAD (rk, 8); \
+ rkey9 = ALIGNED_LOAD (rk, 9); \
+ if (nrounds >= 12) \
+ { \
+ rkey10 = ALIGNED_LOAD (rk, 10); \
+ rkey11 = ALIGNED_LOAD (rk, 11); \
+ if (rounds > 12) \
+ { \
+ rkey12 = ALIGNED_LOAD (rk, 12); \
+ rkey13 = ALIGNED_LOAD (rk, 13); \
+ } \
+ } \
+ rkeylast = ALIGNED_LOAD (rk, nrounds); \
+ } while (0)
+
+#define AES_ENCRYPT_ALL(blk, nrounds) \
+ do { \
+ blk ^= rkey0; \
+ blk = asm_cipher_be (blk, rkey1); \
+ blk = asm_cipher_be (blk, rkey2); \
+ blk = asm_cipher_be (blk, rkey3); \
+ blk = asm_cipher_be (blk, rkey4); \
+ blk = asm_cipher_be (blk, rkey5); \
+ blk = asm_cipher_be (blk, rkey6); \
+ blk = asm_cipher_be (blk, rkey7); \
+ blk = asm_cipher_be (blk, rkey8); \
+ blk = asm_cipher_be (blk, rkey9); \
+ if (nrounds >= 12) \
+ { \
+ blk = asm_cipher_be (blk, rkey10); \
+ blk = asm_cipher_be (blk, rkey11); \
+ if (rounds > 12) \
+ { \
+ blk = asm_cipher_be (blk, rkey12); \
+ blk = asm_cipher_be (blk, rkey13); \
+ } \
+ } \
+ blk = asm_cipherlast_be (blk, rkeylast); \
+ } while (0)
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_aligned_ld(unsigned long offset, const void *ptr)
+{
+ block vec;
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("lvx %0,0,%1\n\t"
+ : "=v" (vec)
+ : "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("lvx %0,%1,%2\n\t"
+ : "=v" (vec)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_aligned_st(block vec, unsigned long offset, void *ptr)
+{
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("stvx %0,0,%1\n\t"
+ :
+ : "v" (vec), "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("stvx %0,%1,%2\n\t"
+ :
+ : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_vperm1(block vec, block mask)
+{
+ block o;
+ __asm__ volatile ("vperm %0,%1,%1,%2\n\t"
+ : "=v" (o)
+ : "v" (vec), "v" (mask));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint128(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vadduqm %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint64(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vaddudm %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_sra_int64(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vsrad %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static block
+asm_swap_uint64_halfs(block a)
+{
+ block res;
+ __asm__ volatile ("xxswapd %x0, %x1"
+ : "=wa" (res)
+ : "wa" (a));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_xor(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vxor %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipher_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vcipher %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipherlast_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vcipherlast %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipher_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vncipher %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipherlast_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vncipherlast %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+ u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
+ int rounds = ctx->rounds;
+ int rr;
+ int r;
+
+ r = 0;
+ rr = rounds;
+ for (r = 0, rr = rounds; r <= rounds; r++, rr--)
+ {
+ ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr));
+ }
+}
+
+#endif /* G10_RIJNDAEL_PPC_COMMON_H */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-ppc-functions.h b/comm/third_party/libgcrypt/cipher/rijndael-ppc-functions.h
new file mode 100644
index 0000000000..72f31852b4
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-ppc-functions.h
@@ -0,0 +1,2020 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+{
+ const block bige_const = asm_load_be_const();
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block b;
+
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ AES_ENCRYPT (b, rounds);
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ return 0; /* does not use stack */
+}
+
+
+unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+{
+ const block bige_const = asm_load_be_const();
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block b;
+
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ AES_DECRYPT (b, rounds);
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ return 0; /* does not use stack */
+}
+
+
+void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES_ALL;
+ block rkeylast_orig;
+ block iv;
+
+ iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS_ALL (rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 2; nblocks -= 2)
+ {
+ block in2, iv1;
+
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+ in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+ in += 2;
+
+ AES_ENCRYPT_ALL (iv, rounds);
+
+ iv1 = iv;
+ rkeylast = rkeylast_orig ^ in2;
+
+ AES_ENCRYPT_ALL (iv, rounds);
+
+ VEC_STORE_BE (out++, 0, iv1, bige_const);
+ VEC_STORE_BE (out++, 0, iv, bige_const);
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+ AES_ENCRYPT_ALL (iv, rounds);
+
+ VEC_STORE_BE (out++, 0, iv, bige_const);
+ }
+
+ VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block iv, b, bin;
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block rkey;
+
+ iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ in0 = iv;
+ in1 = VEC_LOAD_BE_NOSWAP (in, 0);
+ in2 = VEC_LOAD_BE_NOSWAP (in, 1);
+ in3 = VEC_LOAD_BE_NOSWAP (in, 2);
+ in4 = VEC_LOAD_BE_NOSWAP (in, 3);
+ in1 = VEC_BE_SWAP (in1, bige_const);
+ in2 = VEC_BE_SWAP (in2, bige_const);
+ in5 = VEC_LOAD_BE_NOSWAP (in, 4);
+ in6 = VEC_LOAD_BE_NOSWAP (in, 5);
+ in3 = VEC_BE_SWAP (in3, bige_const);
+ in4 = VEC_BE_SWAP (in4, bige_const);
+ in7 = VEC_LOAD_BE_NOSWAP (in, 6);
+ iv = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ in5 = VEC_BE_SWAP (in5, bige_const);
+ in6 = VEC_BE_SWAP (in6, bige_const);
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ in7 = VEC_BE_SWAP (in7, bige_const);
+ iv = VEC_BE_SWAP (iv, bige_const);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+ b4 = asm_xor (rkey0, in4);
+ b5 = asm_xor (rkey0, in5);
+ b6 = asm_xor (rkey0, in6);
+ b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+ in4 = asm_xor (rkeylast, in4);
+ b0 = asm_cipherlast_be (b0, in1);
+ b1 = asm_cipherlast_be (b1, in2);
+ in5 = asm_xor (rkeylast, in5);
+ in6 = asm_xor (rkeylast, in6);
+ b2 = asm_cipherlast_be (b2, in3);
+ b3 = asm_cipherlast_be (b3, in4);
+ in7 = asm_xor (rkeylast, in7);
+ in0 = asm_xor (rkeylast, iv);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_cipherlast_be (b4, in5);
+ b5 = asm_cipherlast_be (b5, in6);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_cipherlast_be (b6, in7);
+ b7 = asm_cipherlast_be (b7, in0);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ in0 = iv;
+ in1 = VEC_LOAD_BE (in, 0, bige_const);
+ in2 = VEC_LOAD_BE (in, 1, bige_const);
+ in3 = VEC_LOAD_BE (in, 2, bige_const);
+ iv = VEC_LOAD_BE (in, 3, bige_const);
+
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+ in0 = asm_xor (rkeylast, iv);
+ b0 = asm_cipherlast_be (b0, in1);
+ b1 = asm_cipherlast_be (b1, in2);
+ b2 = asm_cipherlast_be (b2, in3);
+ b3 = asm_cipherlast_be (b3, in0);
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ bin = VEC_LOAD_BE (in, 0, bige_const);
+ rkeylast = rkeylast_orig ^ bin;
+ b = iv;
+ iv = bin;
+
+ AES_ENCRYPT (b, rounds);
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ out++;
+ in++;
+ }
+
+ VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ byte *out = (byte *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES_ALL;
+ block lastiv, b;
+ unsigned int outadd = -(!cbc_mac) & 16;
+
+ lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS_ALL (rounds);
+
+ for (; nblocks >= 2; nblocks -= 2)
+ {
+ block in2, lastiv1;
+
+ b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const);
+ in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+ in += 2;
+
+ AES_ENCRYPT_ALL (b, rounds);
+
+ lastiv1 = b;
+ b = lastiv1 ^ in2;
+
+ AES_ENCRYPT_ALL (b, rounds);
+
+ lastiv = b;
+ VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const);
+ out += outadd;
+ VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const);
+ out += outadd;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+ AES_ENCRYPT_ALL (b, rounds);
+
+ lastiv = b;
+ VEC_STORE_BE ((u128_t *)out, 0, b, bige_const);
+ out += outadd;
+ }
+
+ VEC_STORE_BE (iv_arg, 0, lastiv, bige_const);
+}
+
+void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block rkey;
+ block iv, b;
+
+ if (!ctx->decryption_prepared)
+ {
+ internal_aes_ppc_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ in0 = VEC_BE_SWAP (in0, bige_const);
+ in1 = VEC_BE_SWAP (in1, bige_const);
+ in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ in2 = VEC_BE_SWAP (in2, bige_const);
+ in3 = VEC_BE_SWAP (in3, bige_const);
+ in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ in4 = VEC_BE_SWAP (in4, bige_const);
+ in5 = VEC_BE_SWAP (in5, bige_const);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+ in6 = VEC_BE_SWAP (in6, bige_const);
+ in7 = VEC_BE_SWAP (in7, bige_const);
+ b4 = asm_xor (rkey0, in4);
+ b5 = asm_xor (rkey0, in5);
+ b6 = asm_xor (rkey0, in6);
+ b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey); \
+ b4 = asm_ncipher_be (b4, rkey); \
+ b5 = asm_ncipher_be (b5, rkey); \
+ b6 = asm_ncipher_be (b6, rkey); \
+ b7 = asm_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ iv = asm_xor (rkeylast, iv);
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ b0 = asm_ncipherlast_be (b0, iv);
+ iv = in7;
+ b1 = asm_ncipherlast_be (b1, in0);
+ in3 = asm_xor (rkeylast, in3);
+ in4 = asm_xor (rkeylast, in4);
+ b2 = asm_ncipherlast_be (b2, in1);
+ b3 = asm_ncipherlast_be (b3, in2);
+ in5 = asm_xor (rkeylast, in5);
+ in6 = asm_xor (rkeylast, in6);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_ncipherlast_be (b4, in3);
+ b5 = asm_ncipherlast_be (b5, in4);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_ncipherlast_be (b6, in5);
+ b7 = asm_ncipherlast_be (b7, in6);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ in0 = VEC_LOAD_BE (in, 0, bige_const);
+ in1 = VEC_LOAD_BE (in, 1, bige_const);
+ in2 = VEC_LOAD_BE (in, 2, bige_const);
+ in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ iv = asm_xor (rkeylast, iv);
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+
+ b0 = asm_ncipherlast_be (b0, iv);
+ iv = in3;
+ b1 = asm_ncipherlast_be (b1, in0);
+ b2 = asm_ncipherlast_be (b2, in1);
+ b3 = asm_ncipherlast_be (b3, in2);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ rkeylast = rkeylast_orig ^ iv;
+
+ iv = VEC_LOAD_BE (in, 0, bige_const);
+ b = iv;
+ AES_DECRYPT (b, rounds);
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in++;
+ out++;
+ }
+
+ VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ static const unsigned char vec_one_const[16] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block ctr, b, one;
+
+ ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const);
+ one = VEC_LOAD_BE (&vec_one_const, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+ rkeylast_orig = rkeylast;
+
+ if (nblocks >= 4)
+ {
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block two, three, four;
+ block rkey;
+
+ two = asm_add_uint128 (one, one);
+ three = asm_add_uint128 (two, one);
+ four = asm_add_uint128 (two, two);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b1 = asm_add_uint128 (ctr, one);
+ b2 = asm_add_uint128 (ctr, two);
+ b3 = asm_add_uint128 (ctr, three);
+ b4 = asm_add_uint128 (ctr, four);
+ b5 = asm_add_uint128 (b1, four);
+ b6 = asm_add_uint128 (b2, four);
+ b7 = asm_add_uint128 (b3, four);
+ b0 = asm_xor (rkey0, ctr);
+ rkey = ALIGNED_LOAD (rk, 1);
+ ctr = asm_add_uint128 (b4, four);
+ b1 = asm_xor (rkey0, b1);
+ b2 = asm_xor (rkey0, b2);
+ b3 = asm_xor (rkey0, b3);
+ b0 = asm_cipher_be (b0, rkey);
+ b1 = asm_cipher_be (b1, rkey);
+ b2 = asm_cipher_be (b2, rkey);
+ b3 = asm_cipher_be (b3, rkey);
+ b4 = asm_xor (rkey0, b4);
+ b5 = asm_xor (rkey0, b5);
+ b6 = asm_xor (rkey0, b6);
+ b7 = asm_xor (rkey0, b7);
+ b4 = asm_cipher_be (b4, rkey);
+ b5 = asm_cipher_be (b5, rkey);
+ b6 = asm_cipher_be (b6, rkey);
+ b7 = asm_cipher_be (b7, rkey);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ DO_ROUND(2);
+ in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ DO_ROUND(3);
+ in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ DO_ROUND(4);
+ in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ DO_ROUND(5);
+ in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ DO_ROUND(6);
+ in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ DO_ROUND(7);
+ in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ DO_ROUND(8);
+ in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ DO_ROUND(9);
+
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in0 = VEC_BE_SWAP (in0, bige_const);
+ in1 = VEC_BE_SWAP (in1, bige_const);
+ in2 = VEC_BE_SWAP (in2, bige_const);
+ in3 = VEC_BE_SWAP (in3, bige_const);
+ in4 = VEC_BE_SWAP (in4, bige_const);
+ in5 = VEC_BE_SWAP (in5, bige_const);
+ in6 = VEC_BE_SWAP (in6, bige_const);
+ in7 = VEC_BE_SWAP (in7, bige_const);
+
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+ b0 = asm_cipherlast_be (b0, in0);
+ b1 = asm_cipherlast_be (b1, in1);
+ in4 = asm_xor (rkeylast, in4);
+ in5 = asm_xor (rkeylast, in5);
+ b2 = asm_cipherlast_be (b2, in2);
+ b3 = asm_cipherlast_be (b3, in3);
+ in6 = asm_xor (rkeylast, in6);
+ in7 = asm_xor (rkeylast, in7);
+ b4 = asm_cipherlast_be (b4, in4);
+ b5 = asm_cipherlast_be (b5, in5);
+ b6 = asm_cipherlast_be (b6, in6);
+ b7 = asm_cipherlast_be (b7, in7);
+
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ b1 = asm_add_uint128 (ctr, one);
+ b2 = asm_add_uint128 (ctr, two);
+ b3 = asm_add_uint128 (ctr, three);
+ b0 = asm_xor (rkey0, ctr);
+ ctr = asm_add_uint128 (ctr, four);
+ b1 = asm_xor (rkey0, b1);
+ b2 = asm_xor (rkey0, b2);
+ b3 = asm_xor (rkey0, b3);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+
+ in0 = VEC_LOAD_BE (in, 0, bige_const);
+ in1 = VEC_LOAD_BE (in, 1, bige_const);
+ in2 = VEC_LOAD_BE (in, 2, bige_const);
+ in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+
+ b0 = asm_cipherlast_be (b0, in0);
+ b1 = asm_cipherlast_be (b1, in1);
+ b2 = asm_cipherlast_be (b2, in2);
+ b3 = asm_cipherlast_be (b3, in3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ b = ctr;
+ ctr = asm_add_uint128 (ctr, one);
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+
+ AES_ENCRYPT (b, rounds);
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ out++;
+ in++;
+ }
+
+ VEC_STORE_BE (ctr_arg, 0, ctr, bige_const);
+}
+
+
+size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ u64 data_nblocks = c->u_mode.ocb.data_nblocks;
+ block l0, l1, l2, l;
+ block b0, b1, b2, b3, b4, b5, b6, b7, b;
+ block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+ block rkey, rkeylf;
+ block ctr, iv;
+ ROUND_KEY_VARIABLES;
+
+ iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const);
+ ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const);
+
+ l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+ l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+ l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+ if (encrypt)
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ b ^= iv;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+ l = VEC_BE_SWAP(l, bige_const);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l2;
+ iv4 = iv ^ l1 ^ l2 ^ l0;
+ iv5 = iv ^ l2 ^ l0;
+ iv6 = iv ^ l2;
+ iv7 = iv ^ l2 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ b4 ^= iv4;
+ b5 ^= iv5;
+ b6 ^= iv6;
+ b7 ^= iv7;
+ iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ iv0 = asm_xor (rkeylf, iv0);
+ iv1 = asm_xor (rkeylf, iv1);
+ iv2 = asm_xor (rkeylf, iv2);
+ iv3 = asm_xor (rkeylf, iv3);
+ iv4 = asm_xor (rkeylf, iv4);
+ iv5 = asm_xor (rkeylf, iv5);
+ iv6 = asm_xor (rkeylf, iv6);
+ iv7 = asm_xor (rkeylf, iv7);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_cipherlast_be (b0, iv0);
+ b1 = asm_cipherlast_be (b1, iv1);
+ b2 = asm_cipherlast_be (b2, iv2);
+ b3 = asm_cipherlast_be (b3, iv3);
+ b4 = asm_cipherlast_be (b4, iv4);
+ b5 = asm_cipherlast_be (b5, iv5);
+ b6 = asm_cipherlast_be (b6, iv6);
+ b7 = asm_cipherlast_be (b7, iv7);
+
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4 && (data_nblocks % 4) == 0)
+ {
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast ^ rkey0;
+ b0 = asm_cipherlast_be (b0, rkey ^ iv0);
+ b1 = asm_cipherlast_be (b1, rkey ^ iv1);
+ b2 = asm_cipherlast_be (b2, rkey ^ iv2);
+ b3 = asm_cipherlast_be (b3, rkey ^ iv3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ b ^= iv;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+ }
+ else
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+ if (!ctx->decryption_prepared)
+ {
+ internal_aes_ppc_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ b ^= iv;
+ AES_DECRYPT (b, rounds);
+ b ^= iv;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+ l = VEC_BE_SWAP(l, bige_const);
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l2;
+ iv4 = iv ^ l1 ^ l2 ^ l0;
+ iv5 = iv ^ l2 ^ l0;
+ iv6 = iv ^ l2;
+ iv7 = iv ^ l2 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ b4 ^= iv4;
+ b5 ^= iv5;
+ b6 ^= iv6;
+ b7 ^= iv7;
+ iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey); \
+ b4 = asm_ncipher_be (b4, rkey); \
+ b5 = asm_ncipher_be (b5, rkey); \
+ b6 = asm_ncipher_be (b6, rkey); \
+ b7 = asm_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ iv0 = asm_xor (rkeylf, iv0);
+ iv1 = asm_xor (rkeylf, iv1);
+ iv2 = asm_xor (rkeylf, iv2);
+ iv3 = asm_xor (rkeylf, iv3);
+ iv4 = asm_xor (rkeylf, iv4);
+ iv5 = asm_xor (rkeylf, iv5);
+ iv6 = asm_xor (rkeylf, iv6);
+ iv7 = asm_xor (rkeylf, iv7);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_ncipherlast_be (b0, iv0);
+ b1 = asm_ncipherlast_be (b1, iv1);
+ b2 = asm_ncipherlast_be (b2, iv2);
+ b3 = asm_ncipherlast_be (b3, iv3);
+ b4 = asm_ncipherlast_be (b4, iv4);
+ b5 = asm_ncipherlast_be (b5, iv5);
+ b6 = asm_ncipherlast_be (b6, iv6);
+ b7 = asm_ncipherlast_be (b7, iv7);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4 && (data_nblocks % 4) == 0)
+ {
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast ^ rkey0;
+ b0 = asm_ncipherlast_be (b0, rkey ^ iv0);
+ b1 = asm_ncipherlast_be (b1, rkey ^ iv1);
+ b2 = asm_ncipherlast_be (b2, rkey ^ iv2);
+ b3 = asm_ncipherlast_be (b3, rkey ^ iv3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ b ^= iv;
+ AES_DECRYPT (b, rounds);
+ b ^= iv;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+ }
+
+ VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const);
+ VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const);
+ c->u_mode.ocb.data_nblocks = data_nblocks;
+
+ return 0;
+}
+
+size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *abuf = (const u128_t *)abuf_arg;
+ int rounds = ctx->rounds;
+ u64 data_nblocks = c->u_mode.ocb.aad_nblocks;
+ block l0, l1, l2, l;
+ block b0, b1, b2, b3, b4, b5, b6, b7, b;
+ block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+ block rkey, frkey;
+ block ctr, iv;
+ ROUND_KEY_VARIABLES;
+
+ iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const);
+ ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const);
+
+ l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+ l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+ l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ ctr ^= b;
+
+ abuf += 1;
+ }
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+ b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+ b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+ b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+ b4 = VEC_LOAD_BE (abuf, 4, bige_const);
+ b5 = VEC_LOAD_BE (abuf, 5, bige_const);
+ b6 = VEC_LOAD_BE (abuf, 6, bige_const);
+ b7 = VEC_LOAD_BE (abuf, 7, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const);
+
+ frkey = rkey0;
+ iv ^= frkey;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l2;
+ iv4 = iv ^ l1 ^ l2 ^ l0;
+ iv5 = iv ^ l2 ^ l0;
+ iv6 = iv ^ l2;
+ iv7 = iv ^ l2 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ b4 ^= iv4;
+ b5 ^= iv5;
+ b6 ^= iv6;
+ b7 ^= iv7;
+ iv = iv7 ^ frkey;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_cipherlast_be (b0, rkey);
+ b1 = asm_cipherlast_be (b1, rkey);
+ b2 = asm_cipherlast_be (b2, rkey);
+ b3 = asm_cipherlast_be (b3, rkey);
+ b4 = asm_cipherlast_be (b4, rkey);
+ b5 = asm_cipherlast_be (b5, rkey);
+ b6 = asm_cipherlast_be (b6, rkey);
+ b7 = asm_cipherlast_be (b7, rkey);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+ abuf += 8;
+ }
+
+ if (nblocks >= 4 && (data_nblocks % 4) == 0)
+ {
+ b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+ b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+ b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+ b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+ frkey = rkey0;
+ iv ^= frkey;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ iv = iv3 ^ frkey;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_cipherlast_be (b0, rkey);
+ b1 = asm_cipherlast_be (b1, rkey);
+ b2 = asm_cipherlast_be (b2, rkey);
+ b3 = asm_cipherlast_be (b3, rkey);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+ abuf += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ ctr ^= b;
+
+ abuf += 1;
+ }
+
+ VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const);
+ VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const);
+ c->u_mode.ocb.aad_nblocks = data_nblocks;
+
+ return 0;
+}
+
+
+void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
+{
+#ifdef WORDS_BIGENDIAN
+ static const block vec_bswap128_const =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+#else
+ static const block vec_bswap128_const =
+ { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 };
+#endif
+ static const unsigned char vec_tweak_const[16] =
+ { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
+ static const vector unsigned long long vec_shift63_const =
+ { 63, 63 };
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ block tweak;
+ block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf;
+ block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
+ block tweak_const, bswap128_const, shift63_const;
+ ROUND_KEY_VARIABLES;
+
+ tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const);
+ bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0);
+ shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0);
+
+ tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const);
+ tweak = asm_vperm1 (tweak, bswap128_const);
+
+#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \
+ do { \
+ block tmp1, tmp2; \
+ tmp1 = asm_swap_uint64_halfs(tin); \
+ tmp2 = asm_add_uint64(tin, tin); \
+ tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \
+ tout = asm_xor(tmp1, tmp2); \
+ } while (0)
+
+ if (encrypt)
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ GEN_TWEAK (tweak2, tweak1);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ GEN_TWEAK (tweak3, tweak2);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ GEN_TWEAK (tweak4, tweak3);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ GEN_TWEAK (tweak5, tweak4);
+ tweak4 = asm_vperm1 (tweak4, bswap128_const);
+ GEN_TWEAK (tweak6, tweak5);
+ tweak5 = asm_vperm1 (tweak5, bswap128_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ GEN_TWEAK (tweak7, tweak6);
+ tweak6 = asm_vperm1 (tweak6, bswap128_const);
+ GEN_TWEAK (tweak, tweak7);
+ tweak7 = asm_vperm1 (tweak7, bswap128_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+
+ tweak0 = asm_xor (tweak0, rkey0);
+ tweak1 = asm_xor (tweak1, rkey0);
+ tweak2 = asm_xor (tweak2, rkey0);
+ tweak3 = asm_xor (tweak3, rkey0);
+ tweak4 = asm_xor (tweak4, rkey0);
+ tweak5 = asm_xor (tweak5, rkey0);
+ tweak6 = asm_xor (tweak6, rkey0);
+ tweak7 = asm_xor (tweak7, rkey0);
+
+ b0 = asm_xor (b0, tweak0);
+ b1 = asm_xor (b1, tweak1);
+ b2 = asm_xor (b2, tweak2);
+ b3 = asm_xor (b3, tweak3);
+ b4 = asm_xor (b4, tweak4);
+ b5 = asm_xor (b5, tweak5);
+ b6 = asm_xor (b6, tweak6);
+ b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ tweak0 = asm_xor (tweak0, rkeylf);
+ tweak1 = asm_xor (tweak1, rkeylf);
+ tweak2 = asm_xor (tweak2, rkeylf);
+ tweak3 = asm_xor (tweak3, rkeylf);
+ tweak4 = asm_xor (tweak4, rkeylf);
+ tweak5 = asm_xor (tweak5, rkeylf);
+ tweak6 = asm_xor (tweak6, rkeylf);
+ tweak7 = asm_xor (tweak7, rkeylf);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_cipherlast_be (b0, tweak0);
+ b1 = asm_cipherlast_be (b1, tweak1);
+ b2 = asm_cipherlast_be (b2, tweak2);
+ b3 = asm_cipherlast_be (b3, tweak3);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_cipherlast_be (b4, tweak4);
+ b5 = asm_cipherlast_be (b5, tweak5);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_cipherlast_be (b6, tweak6);
+ b7 = asm_cipherlast_be (b7, tweak7);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ GEN_TWEAK (tweak2, tweak1);
+ GEN_TWEAK (tweak3, tweak2);
+ GEN_TWEAK (tweak, tweak3);
+
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+ b0 ^= tweak0 ^ rkey0;
+ b1 ^= tweak1 ^ rkey0;
+ b2 ^= tweak2 ^ rkey0;
+ b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_cipherlast_be (b0, rkey ^ tweak0);
+ b1 = asm_cipherlast_be (b1, rkey ^ tweak1);
+ b2 = asm_cipherlast_be (b2, rkey ^ tweak2);
+ b3 = asm_cipherlast_be (b3, rkey ^ tweak3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+ /* Generate next tweak. */
+ GEN_TWEAK (tweak, tweak);
+
+ AES_ENCRYPT (b, rounds);
+
+ b ^= tweak0;
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in++;
+ out++;
+ }
+ }
+ else
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+ if (!ctx->decryption_prepared)
+ {
+ internal_aes_ppc_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ GEN_TWEAK (tweak2, tweak1);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ GEN_TWEAK (tweak3, tweak2);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ GEN_TWEAK (tweak4, tweak3);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ GEN_TWEAK (tweak5, tweak4);
+ tweak4 = asm_vperm1 (tweak4, bswap128_const);
+ GEN_TWEAK (tweak6, tweak5);
+ tweak5 = asm_vperm1 (tweak5, bswap128_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ GEN_TWEAK (tweak7, tweak6);
+ tweak6 = asm_vperm1 (tweak6, bswap128_const);
+ GEN_TWEAK (tweak, tweak7);
+ tweak7 = asm_vperm1 (tweak7, bswap128_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+
+ tweak0 = asm_xor (tweak0, rkey0);
+ tweak1 = asm_xor (tweak1, rkey0);
+ tweak2 = asm_xor (tweak2, rkey0);
+ tweak3 = asm_xor (tweak3, rkey0);
+ tweak4 = asm_xor (tweak4, rkey0);
+ tweak5 = asm_xor (tweak5, rkey0);
+ tweak6 = asm_xor (tweak6, rkey0);
+ tweak7 = asm_xor (tweak7, rkey0);
+
+ b0 = asm_xor (b0, tweak0);
+ b1 = asm_xor (b1, tweak1);
+ b2 = asm_xor (b2, tweak2);
+ b3 = asm_xor (b3, tweak3);
+ b4 = asm_xor (b4, tweak4);
+ b5 = asm_xor (b5, tweak5);
+ b6 = asm_xor (b6, tweak6);
+ b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey); \
+ b4 = asm_ncipher_be (b4, rkey); \
+ b5 = asm_ncipher_be (b5, rkey); \
+ b6 = asm_ncipher_be (b6, rkey); \
+ b7 = asm_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ tweak0 = asm_xor (tweak0, rkeylf);
+ tweak1 = asm_xor (tweak1, rkeylf);
+ tweak2 = asm_xor (tweak2, rkeylf);
+ tweak3 = asm_xor (tweak3, rkeylf);
+ tweak4 = asm_xor (tweak4, rkeylf);
+ tweak5 = asm_xor (tweak5, rkeylf);
+ tweak6 = asm_xor (tweak6, rkeylf);
+ tweak7 = asm_xor (tweak7, rkeylf);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_ncipherlast_be (b0, tweak0);
+ b1 = asm_ncipherlast_be (b1, tweak1);
+ b2 = asm_ncipherlast_be (b2, tweak2);
+ b3 = asm_ncipherlast_be (b3, tweak3);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_ncipherlast_be (b4, tweak4);
+ b5 = asm_ncipherlast_be (b5, tweak5);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_ncipherlast_be (b6, tweak6);
+ b7 = asm_ncipherlast_be (b7, tweak7);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ GEN_TWEAK (tweak2, tweak1);
+ GEN_TWEAK (tweak3, tweak2);
+ GEN_TWEAK (tweak, tweak3);
+
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+ b0 ^= tweak0 ^ rkey0;
+ b1 ^= tweak1 ^ rkey0;
+ b2 ^= tweak2 ^ rkey0;
+ b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_ncipherlast_be (b0, rkey ^ tweak0);
+ b1 = asm_ncipherlast_be (b1, rkey ^ tweak1);
+ b2 = asm_ncipherlast_be (b2, rkey ^ tweak2);
+ b3 = asm_ncipherlast_be (b3, rkey ^ tweak3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+ /* Generate next tweak. */
+ GEN_TWEAK (tweak, tweak);
+
+ AES_DECRYPT (b, rounds);
+
+ b ^= tweak0;
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in++;
+ out++;
+ }
+ }
+
+ tweak = asm_vperm1 (tweak, bswap128_const);
+ VEC_STORE_BE (tweak_arg, 0, tweak, bige_const);
+
+#undef GEN_TWEAK
+}
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-ppc.c b/comm/third_party/libgcrypt/cipher/rijndael-ppc.c
new file mode 100644
index 0000000000..f5c3236111
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-ppc.c
@@ -0,0 +1,259 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO
+
+#include "rijndael-ppc-common.h"
+
+
+#ifdef WORDS_BIGENDIAN
+static const block vec_bswap32_const =
+ { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+#else
+static const block vec_bswap32_const_neg =
+ { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 };
+#endif
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_const(void)
+{
+#ifndef WORDS_BIGENDIAN
+ return ALIGNED_LOAD (&vec_bswap32_const_neg, 0);
+#else
+ static const block vec_dummy = { 0 };
+ return vec_dummy;
+#endif
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_be_swap(block vec, block be_bswap_const)
+{
+ (void)be_bswap_const;
+#ifndef WORDS_BIGENDIAN
+ return asm_vperm1 (vec, be_bswap_const);
+#else
+ return vec;
+#endif
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_noswap(unsigned long offset, const void *ptr)
+{
+ block vec;
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("lxvw4x %x0,0,%1\n\t"
+ : "=wa" (vec)
+ : "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("lxvw4x %x0,%1,%2\n\t"
+ : "=wa" (vec)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+ /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
+{
+ /* NOTE: vec be-swapped using 'asm_be_swap' by caller */
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("stxvw4x %x0,0,%1\n\t"
+ :
+ : "wa" (vec), "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("stxvw4x %x0,%1,%2\n\t"
+ :
+ : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+_gcry_aes_sbox4_ppc8(u32 fourbytes)
+{
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ block data_vec;
+ u32 data32[4];
+ } u;
+
+ u.data32[0] = fourbytes;
+ u.data_vec = vec_sbox_be(u.data_vec);
+ return u.data32[0];
+}
+
+void
+_gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+ const block bige_const = asm_load_be_const();
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ byte data[MAXKC][4];
+ u32 data32[MAXKC];
+ } tkk[2];
+ unsigned int rounds = ctx->rounds;
+ int KC = rounds - 6;
+ unsigned int keylen = KC * 4;
+ u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+ unsigned int i, r, t;
+ byte rcon = 1;
+ int j;
+#define k tkk[0].data
+#define k_u32 tkk[0].data32
+#define tk tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W (ctx->keyschenc)
+#define W_u32 (ctx->keyschenc32)
+
+ for (i = 0; i < keylen; i++)
+ {
+ k[i >> 2][i & 3] = key[i];
+ }
+
+ for (j = KC-1; j >= 0; j--)
+ {
+ tk_u32[j] = k_u32[j];
+ }
+ r = 0;
+ t = 0;
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+ while (r < rounds + 1)
+ {
+ tk_u32[0] ^=
+ le_bswap32(
+ _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon);
+
+ if (KC != 8)
+ {
+ for (j = 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+ else
+ {
+ for (j = 1; j < KC/2; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+
+ tk_u32[KC/2] ^=
+ le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1])));
+
+ for (j = KC/2 + 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+
+ rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b);
+ }
+
+ /* Store in big-endian order. */
+ for (r = 0; r <= rounds; r++)
+ {
+#ifndef WORDS_BIGENDIAN
+ VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const);
+#else
+ block rvec = ALIGNED_LOAD (ekey, r);
+ ALIGNED_STORE (ekey, r,
+ vec_perm(rvec, rvec, vec_bswap32_const));
+ (void)bige_const;
+#endif
+ }
+
+#undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+ wipememory(&tkk, sizeof(tkk));
+}
+
+void
+_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ internal_aes_ppc_prepare_decryption (ctx);
+}
+
+
+#define GCRY_AES_PPC8 1
+#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc8_encrypt
+#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc8_decrypt
+#define CFB_ENC_FUNC _gcry_aes_ppc8_cfb_enc
+#define CFB_DEC_FUNC _gcry_aes_ppc8_cfb_dec
+#define CBC_ENC_FUNC _gcry_aes_ppc8_cbc_enc
+#define CBC_DEC_FUNC _gcry_aes_ppc8_cbc_dec
+#define CTR_ENC_FUNC _gcry_aes_ppc8_ctr_enc
+#define OCB_CRYPT_FUNC _gcry_aes_ppc8_ocb_crypt
+#define OCB_AUTH_FUNC _gcry_aes_ppc8_ocb_auth
+#define XTS_CRYPT_FUNC _gcry_aes_ppc8_xts_crypt
+
+#include <rijndael-ppc-functions.h>
+
+#endif /* USE_PPC_CRYPTO */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-ppc9le.c b/comm/third_party/libgcrypt/cipher/rijndael-ppc9le.c
new file mode 100644
index 0000000000..facdedd4f2
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-ppc9le.c
@@ -0,0 +1,102 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+
+#include "rijndael-ppc-common.h"
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_const(void)
+{
+ static const block vec_dummy = { 0 };
+ return vec_dummy;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_be_swap(block vec, block be_bswap_const)
+{
+ (void)be_bswap_const;
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_noswap(unsigned long offset, const void *ptr)
+{
+ block vec;
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("lxvb16x %x0,0,%1\n\t"
+ : "=wa" (vec)
+ : "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("lxvb16x %x0,%1,%2\n\t"
+ : "=wa" (vec)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
+{
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("stxvb16x %x0,0,%1\n\t"
+ :
+ : "wa" (vec), "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("stxvb16x %x0,%1,%2\n\t"
+ :
+ : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+}
+
+
+#define GCRY_AES_PPC9LE 1
+#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc9le_encrypt
+#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc9le_decrypt
+#define CFB_ENC_FUNC _gcry_aes_ppc9le_cfb_enc
+#define CFB_DEC_FUNC _gcry_aes_ppc9le_cfb_dec
+#define CBC_ENC_FUNC _gcry_aes_ppc9le_cbc_enc
+#define CBC_DEC_FUNC _gcry_aes_ppc9le_cbc_dec
+#define CTR_ENC_FUNC _gcry_aes_ppc9le_ctr_enc
+#define OCB_CRYPT_FUNC _gcry_aes_ppc9le_ocb_crypt
+#define OCB_AUTH_FUNC _gcry_aes_ppc9le_ocb_auth
+#define XTS_CRYPT_FUNC _gcry_aes_ppc9le_xts_crypt
+
+#include <rijndael-ppc-functions.h>
+
+#endif /* USE_PPC_CRYPTO */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-s390x.c b/comm/third_party/libgcrypt/cipher/rijndael-s390x.c
new file mode 100644
index 0000000000..aea65c5a3d
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-s390x.c
@@ -0,0 +1,1155 @@
+/* Rijndael (AES) for GnuPG - s390x/zSeries AES implementation
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_S390X_CRYPTO
+
+#include "asm-inline-s390x.h"
+
+#define NO_INLINE __attribute__((noinline))
+
+struct aes_s390x_gcm_params_s
+{
+ u32 reserved[3];
+ u32 counter_value;
+ u64 tag[2];
+ u64 hash_subkey[2];
+ u64 total_aad_length;
+ u64 total_cipher_length;
+ u32 initial_counter_value[4];
+ u64 key[4];
+};
+
+#define DECL_QUERY_FUNC(instruction, opcode) \
+ static u128_t instruction ##_query(void) \
+ { \
+ static u128_t function_codes = 0; \
+ static int initialized = 0; \
+ register unsigned long reg0 asm("0") = 0; \
+ register void *reg1 asm("1") = &function_codes; \
+ u128_t r1, r2; \
+ \
+ if (initialized) \
+ return function_codes; \
+ \
+ asm volatile ("0: .insn rre," #opcode " << 16, %[r1], %[r2]\n\t" \
+ " brc 1,0b\n\t" \
+ : [r1] "=a" (r1), [r2] "=a" (r2) \
+ : [reg0] "r" (reg0), [reg1] "r" (reg1) \
+ : "cc", "memory"); \
+ \
+ initialized = 1; \
+ return function_codes; \
+ }
+
+#define DECL_EXECUTE_FUNC(instruction, opcode, param_const) \
+ static ALWAYS_INLINE size_t \
+ instruction ##_execute(unsigned int func, param_const void *param_block, \
+ void *dst, const void *src, size_t src_len) \
+ { \
+ register unsigned long reg0 asm("0") = func; \
+ register param_const byte *reg1 asm("1") = param_block; \
+ u128_t r1 = ((u128_t)(uintptr_t)dst << 64); \
+ u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len; \
+ \
+ asm volatile ("0: .insn rre," #opcode " << 16, %[r1], %[r2]\n\t" \
+ " brc 1,0b\n\t" \
+ : [r1] "+a" (r1), [r2] "+a" (r2) \
+ : [func] "r" (reg0), [param_ptr] "r" (reg1) \
+ : "cc", "memory"); \
+ \
+ return (u64)r2; \
+ }
+
+DECL_QUERY_FUNC(km, 0xb92e);
+DECL_QUERY_FUNC(kmc, 0xb92f);
+DECL_QUERY_FUNC(kmac, 0xb91e);
+DECL_QUERY_FUNC(kmf, 0xb92a);
+DECL_QUERY_FUNC(kmo, 0xb92b);
+
+DECL_EXECUTE_FUNC(km, 0xb92e, const);
+DECL_EXECUTE_FUNC(kmc, 0xb92f, );
+DECL_EXECUTE_FUNC(kmac, 0xb91e, );
+DECL_EXECUTE_FUNC(kmf, 0xb92a, );
+DECL_EXECUTE_FUNC(kmo, 0xb92b, );
+
+static u128_t kma_query(void)
+{
+ static u128_t function_codes = 0;
+ static int initialized = 0;
+ register unsigned long reg0 asm("0") = 0;
+ register void *reg1 asm("1") = &function_codes;
+ u128_t r1, r2, r3;
+
+ if (initialized)
+ return function_codes;
+
+ asm volatile ("0: .insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+ " brc 1,0b\n\t"
+ : [r1] "=a" (r1), [r2] "=a" (r2), [r3] "=a" (r3)
+ : [reg0] "r" (reg0), [reg1] "r" (reg1)
+ : "cc", "memory");
+
+ initialized = 1;
+ return function_codes;
+}
+
+static ALWAYS_INLINE void
+kma_execute(unsigned int func, void *param_block, byte *dst, const byte *src,
+ size_t src_len, const byte *aad, size_t aad_len)
+{
+ register unsigned long reg0 asm("0") = func;
+ register byte *reg1 asm("1") = param_block;
+ u128_t r1 = ((u128_t)(uintptr_t)dst << 64);
+ u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+ u128_t r3 = ((u128_t)(uintptr_t)aad << 64) | (u64)aad_len;
+
+ asm volatile ("0: .insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+ " brc 1,0b\n\t"
+ : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3),
+ [func] "+r" (reg0)
+ : [param_ptr] "r" (reg1)
+ : "cc", "memory");
+}
+
+unsigned int _gcry_aes_s390x_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src)
+{
+ km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, dst, src,
+ BLOCKSIZE);
+ return 0;
+}
+
+unsigned int _gcry_aes_s390x_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src)
+{
+ km_execute (ctx->km_func | KM_DECRYPT, ctx->keyschenc, dst, src,
+ BLOCKSIZE);
+ return 0;
+}
+
+static void aes_s390x_cbc_enc(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ u128_t params[3];
+
+ /* Prepare parameter block. */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ if (cbc_mac)
+ {
+ kmac_execute (ctx->kmac_func | KM_ENCRYPT, &params, NULL, in,
+ nblocks * BLOCKSIZE);
+ memcpy (out, &params[0], BLOCKSIZE);
+ }
+ else
+ {
+ kmc_execute (ctx->kmc_func | KM_ENCRYPT, &params, out, in,
+ nblocks * BLOCKSIZE);
+ }
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cbc_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ u128_t params[3];
+
+ /* Prepare parameter block (ICV & key). */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ kmc_execute (ctx->kmc_func | KM_DECRYPT, &params, out, in,
+ nblocks * BLOCKSIZE);
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cfb128_enc(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ u128_t params[3];
+
+ /* Prepare parameter block. */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ function = ctx->kmf_func | KM_ENCRYPT | KMF_LCFB_16;
+ kmf_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cfb128_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ u128_t blocks[64];
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ size_t max_blocks_used = 0;
+
+ /* AES128-CFB128 decryption speed using KMF was observed to be the same as
+ * the KMF encryption, ~1.03 cpb. Expection was to see similar performance
+ * as for AES128-CBC decryption as decryption for both modes should be
+ * parallalizeble (CBC shows ~0.22 cpb). Therefore there is quite a bit
+ * of room for improvement and implementation below using KM instruction
+ * shows ~0.70 cpb speed, ~30% improvement over KMF instruction.
+ */
+
+ while (nblocks >= 64)
+ {
+ /* Copy IV to encrypt buffer, copy (nblocks - 1) input blocks to
+ * encrypt buffer and update IV. */
+ asm volatile ("mvc 0(16, %[blocks]), 0(%[iv])\n\t"
+ "mvc 16(240, %[blocks]), 0(%[in])\n\t"
+ "mvc 256(256, %[blocks]), 240(%[in])\n\t"
+ "mvc 512(256, %[blocks]), 496(%[in])\n\t"
+ "mvc 768(256, %[blocks]), 752(%[in])\n\t"
+ "mvc 0(16, %[iv]), 1008(%[in])\n\t"
+ :
+ : [in] "a" (in), [out] "a" (out), [blocks] "a" (blocks),
+ [iv] "a" (iv)
+ : "memory");
+
+ /* Perform encryption of temporary buffer. */
+ km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, blocks, blocks,
+ 64 * BLOCKSIZE);
+
+ /* Xor encrypt buffer with input blocks and store to output blocks. */
+ asm volatile ("xc 0(256, %[blocks]), 0(%[in])\n\t"
+ "xc 256(256, %[blocks]), 256(%[in])\n\t"
+ "xc 512(256, %[blocks]), 512(%[in])\n\t"
+ "xc 768(256, %[blocks]), 768(%[in])\n\t"
+ "mvc 0(256, %[out]), 0(%[blocks])\n\t"
+ "mvc 256(256, %[out]), 256(%[blocks])\n\t"
+ "mvc 512(256, %[out]), 512(%[blocks])\n\t"
+ "mvc 768(256, %[out]), 768(%[blocks])\n\t"
+ :
+ : [in] "a" (in), [out] "a" (out), [blocks] "a" (blocks)
+ : "memory");
+
+ max_blocks_used = 64;
+ in += 64 * BLOCKSIZE;
+ out += 64 * BLOCKSIZE;
+ nblocks -= 64;
+ }
+
+ if (nblocks)
+ {
+ unsigned int pos = 0;
+ size_t in_nblocks = nblocks;
+ size_t num_in = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ /* Copy IV to encrypt buffer. */
+ asm volatile ("mvc 0(16, %[blocks]), 0(%[iv])\n\t"
+ :
+ : [blocks] "a" (blocks), [iv] "a" (iv)
+ : "memory");
+ pos += 1;
+
+#define CFB_MOVE_BLOCKS(block_oper, move_nbytes) \
+ block_oper (in_nblocks - 1 >= move_nbytes / BLOCKSIZE) \
+ { \
+ unsigned int move_nblocks = move_nbytes / BLOCKSIZE; \
+ asm volatile ("mvc 0(" #move_nbytes ", %[blocks_x]), 0(%[in])\n\t" \
+ : \
+ : [blocks_x] "a" (&blocks[pos]), [in] "a" (in) \
+ : "memory"); \
+ num_in += move_nblocks; \
+ in += move_nblocks * BLOCKSIZE; \
+ pos += move_nblocks; \
+ in_nblocks -= move_nblocks; \
+ }
+
+ /* Copy (nblocks - 1) input blocks to encrypt buffer. */
+ CFB_MOVE_BLOCKS(while, 256);
+ CFB_MOVE_BLOCKS(if, 128);
+ CFB_MOVE_BLOCKS(if, 64);
+ CFB_MOVE_BLOCKS(if, 32);
+ CFB_MOVE_BLOCKS(if, 16);
+
+#undef CFB_MOVE_BLOCKS
+
+ /* Update IV. */
+ asm volatile ("mvc 0(16, %[iv]), 0(%[in])\n\t"
+ :
+ : [iv] "a" (iv), [in] "a" (in)
+ : "memory");
+ num_in += 1;
+ in += BLOCKSIZE;
+
+ /* Perform encryption of temporary buffer. */
+ km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, blocks, blocks,
+ nblocks * BLOCKSIZE);
+
+ /* Xor encrypt buffer with input blocks and store to output blocks. */
+ pos = 0;
+ in -= nblocks * BLOCKSIZE;
+
+#define CFB_XOR_BLOCKS(block_oper, xor_nbytes) \
+ block_oper (nblocks >= xor_nbytes / BLOCKSIZE) \
+ { \
+ unsigned int xor_nblocks = xor_nbytes / BLOCKSIZE; \
+ asm volatile ("xc 0(" #xor_nbytes ", %[blocks_x]), 0(%[in])\n\t" \
+ "mvc 0(" #xor_nbytes ", %[out]), 0(%[blocks_x])\n\t" \
+ : \
+ : [blocks_x] "a" (&blocks[pos]), [out] "a" (out), \
+ [in] "a" (in) \
+ : "memory"); \
+ out += xor_nblocks * BLOCKSIZE; \
+ in += xor_nblocks * BLOCKSIZE; \
+ nblocks -= xor_nblocks; \
+ pos += xor_nblocks; \
+ }
+
+ CFB_XOR_BLOCKS(while, 256);
+ CFB_XOR_BLOCKS(if, 128);
+ CFB_XOR_BLOCKS(if, 64);
+ CFB_XOR_BLOCKS(if, 32);
+ CFB_XOR_BLOCKS(if, 16);
+
+#undef CFB_XOR_BLOCKS
+ }
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+}
+
+static void aes_s390x_ofb_enc(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ u128_t params[3];
+
+ /* Prepare parameter block. */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ function = ctx->kmo_func | KM_ENCRYPT;
+ kmo_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_ctr128_enc(void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ struct aes_s390x_gcm_params_s params;
+
+ memset (&params.hash_subkey, 0, sizeof(params.hash_subkey));
+ memcpy (&params.key, ctx->keyschenc, 32);
+
+ function = ctx->kma_func | KM_DECRYPT | KMA_HS | KMA_LAAD;
+
+ while (nblocks)
+ {
+ u64 to_overflow = (u64)0xFFFFFFFFU + 1 - buf_get_be32 (ctr + 12);
+ u64 ncurr = nblocks > to_overflow ? to_overflow : nblocks;
+
+ /* Prepare parameter block. */
+ memset (&params.reserved, 0, sizeof(params.reserved));
+ buf_put_be32 (&params.counter_value, buf_get_be32(ctr + 12) - 1);
+ memcpy (&params.initial_counter_value, ctr, 16);
+ params.initial_counter_value[3] = params.counter_value;
+ memset (&params.tag, 0, sizeof(params.tag));
+ params.total_aad_length = 0;
+ params.total_cipher_length = 0;
+
+ /* Update counter. */
+ cipher_block_add (ctr, ncurr, BLOCKSIZE);
+ if (ncurr == (u64)0xFFFFFFFFU + 1)
+ cipher_block_add (ctr, 1, BLOCKSIZE);
+
+ /* Perform CTR using KMA-GCM. */
+ kma_execute (function, &params, out, in, ncurr * BLOCKSIZE, NULL, 0);
+
+ out += ncurr * BLOCKSIZE;
+ in += ncurr * BLOCKSIZE;
+ nblocks -= ncurr;
+ }
+
+ wipememory (&params, sizeof(params));
+}
+
+static size_t aes_s390x_gcm_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ byte *ctr = c->u_ctr.ctr;
+ unsigned int function;
+ struct aes_s390x_gcm_params_s params;
+
+ function = ctx->kma_func | (encrypt ? KM_ENCRYPT : KM_DECRYPT)
+ | KMA_HS | KMA_LAAD;
+
+ /* Prepare parameter block. */
+ memset (&params.reserved, 0, sizeof(params.reserved));
+ buf_put_be32 (&params.counter_value, buf_get_be32(ctr + 12) - 1);
+ memcpy (&params.tag, c->u_mode.gcm.u_tag.tag, 16);
+ memcpy (&params.hash_subkey, c->u_mode.gcm.u_ghash_key.key, 16);
+ params.total_aad_length = 0;
+ params.total_cipher_length = 0;
+ memcpy (&params.initial_counter_value, ctr, 12);
+ params.initial_counter_value[3] = params.counter_value;
+ memcpy (&params.key, ctx->keyschenc, 32);
+
+ /* Update counter (CTR32). */
+ buf_put_be32(ctr + 12, buf_get_be32(ctr + 12) + nblocks);
+
+ /* Perform KMA-GCM. */
+ kma_execute (function, &params, out, in, nblocks * BLOCKSIZE, NULL, 0);
+
+ /* Update tag. */
+ memcpy (c->u_mode.gcm.u_tag.tag, &params.tag, 16);
+
+ wipememory (&params, sizeof(params));
+
+ return 0;
+}
+
+static void aes_s390x_xts_crypt(void *context, unsigned char *tweak,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ u128_t params[3];
+ u128_t *params_tweak;
+
+ if (ctx->rounds < 12)
+ {
+ memcpy (&params[0], ctx->keyschenc, 16);
+ params_tweak = &params[1];
+ memcpy (params_tweak, tweak, BLOCKSIZE);
+ }
+ else if (ctx->rounds == 12)
+ {
+ BUG(); /* KM-XTS-AES-192 not defined. */
+ }
+ else
+ {
+ memcpy (&params[0], ctx->keyschenc, 32);
+ params_tweak = &params[2];
+ memcpy (params_tweak, tweak, BLOCKSIZE);
+ }
+
+ function = ctx->km_func_xts | (encrypt ? KM_ENCRYPT : KM_DECRYPT);
+ km_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+ /* Update tweak with XTSP. */
+ memcpy (tweak, params_tweak, BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static NO_INLINE void
+aes_s390x_ocb_prepare_Ls (gcry_cipher_hd_t c, u64 blkn, const void *Ls[64],
+ const void ***pl)
+{
+ unsigned int n = 64 - (blkn % 64);
+ int i;
+
+ /* Prepare L pointers. */
+ *pl = &Ls[(63 + n) % 64];
+ for (i = 0; i < 64; i += 8, n = (n + 8) % 64)
+ {
+ static const int lastL[8] = { 3, 4, 3, 5, 3, 4, 3, 0 };
+
+ Ls[(0 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 64] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 64] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 64] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(7 + n) % 64] = c->u_mode.ocb.L[lastL[i / 8]];
+ }
+}
+
+static NO_INLINE void
+aes_s390x_ocb_checksum (unsigned char *checksum, const void *plainbuf_arg,
+ size_t nblks)
+{
+ const char *plainbuf = plainbuf_arg;
+ u64 tmp0[2];
+ u64 tmp1[2] = { 0, 0 };
+ u64 tmp2[2] = { 0, 0 };
+ u64 tmp3[2] = { 0, 0 };
+
+ cipher_block_cpy (tmp0, checksum, BLOCKSIZE);
+
+ if (nblks >= 4)
+ {
+ while (nblks >= 4)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ cipher_block_xor_1 (tmp0, plainbuf + 0 * BLOCKSIZE, BLOCKSIZE);
+ cipher_block_xor_1 (tmp1, plainbuf + 1 * BLOCKSIZE, BLOCKSIZE);
+ cipher_block_xor_1 (tmp2, plainbuf + 2 * BLOCKSIZE, BLOCKSIZE);
+ cipher_block_xor_1 (tmp3, plainbuf + 3 * BLOCKSIZE, BLOCKSIZE);
+
+ plainbuf += 4 * BLOCKSIZE;
+ nblks -= 4;
+ }
+
+ cipher_block_xor_1 (tmp0, tmp1, BLOCKSIZE);
+ cipher_block_xor_1 (tmp2, tmp3, BLOCKSIZE);
+ cipher_block_xor_1 (tmp0, tmp2, BLOCKSIZE);
+
+ wipememory (tmp1, sizeof(tmp1));
+ wipememory (tmp2, sizeof(tmp2));
+ wipememory (tmp3, sizeof(tmp3));
+ }
+
+ while (nblks > 0)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ cipher_block_xor_1 (tmp0, plainbuf, BLOCKSIZE);
+
+ plainbuf += BLOCKSIZE;
+ nblks--;
+ }
+
+ cipher_block_cpy (checksum, tmp0, BLOCKSIZE);
+
+ wipememory (tmp0, sizeof(tmp0));
+}
+
+static NO_INLINE size_t
+aes_s390x_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks_arg)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ size_t nblocks = nblocks_arg;
+ u128_t blocks[64];
+ u128_t offset;
+ size_t max_blocks_used = 0;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+ unsigned int function = ctx->km_func | KM_ENCRYPT;
+ const void *Ls[64];
+ const void **pl;
+
+ aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+ /* Checksumming could be done inline in OCB_INPUT macros, but register
+ * pressure becomes too heavy and performance would end up being worse.
+ * For decryption, checksumming is part of OCB_OUTPUT macros as
+ * output handling is less demanding and can handle the additional
+ * computation. */
+ aes_s390x_ocb_checksum (c->u_ctr.ctr, inbuf_arg, nblocks_arg);
+
+ cipher_block_cpy (&offset, &c->u_iv.iv, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+ cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+ cipher_block_xor (outbuf + (n) * BLOCKSIZE, inbuf + (n) * BLOCKSIZE, \
+ &offset, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+ OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+ OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+ OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+ OCB_INPUT_4((n) + 12);
+
+#define OCB_OUTPUT(n) \
+ cipher_block_xor_1 (outbuf + (n) * BLOCKSIZE, &blocks[n], BLOCKSIZE)
+
+#define OCB_OUTPUT_4(n) \
+ OCB_OUTPUT((n) + 0); OCB_OUTPUT((n) + 1); OCB_OUTPUT((n) + 2); \
+ OCB_OUTPUT((n) + 3)
+
+#define OCB_OUTPUT_16(n) \
+ OCB_OUTPUT_4((n) + 0); OCB_OUTPUT_4((n) + 4); OCB_OUTPUT_4((n) + 8); \
+ OCB_OUTPUT_4((n) + 12);
+
+ while (nblocks >= 64)
+ {
+ blkn += 64;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ OCB_INPUT_16(0);
+ OCB_INPUT_16(16);
+ OCB_INPUT_16(32);
+ OCB_INPUT_16(48);
+
+ km_execute (function, ctx->keyschenc, outbuf, outbuf, 64 * BLOCKSIZE);
+
+ asm volatile ("xc 0(256, %[out]), 0(%[blocks])\n\t"
+ "xc 256(256, %[out]), 256(%[blocks])\n\t"
+ "xc 512(256, %[out]), 512(%[blocks])\n\t"
+ "xc 768(256, %[out]), 768(%[blocks])\n\t"
+ :
+ : [out] "a" (outbuf), [blocks] "a" (blocks)
+ : "memory");
+
+ max_blocks_used = 64;
+ inbuf += 64 * BLOCKSIZE;
+ outbuf += 64 * BLOCKSIZE;
+ nblocks -= 64;
+ }
+
+ if (nblocks)
+ {
+ unsigned int pos = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ blkn += nblocks;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ while (nblocks >= 16)
+ {
+ OCB_INPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_INPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_INPUT(pos + 0);
+ OCB_INPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_INPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+
+ nblocks = pos;
+ pos = 0;
+ km_execute (function, ctx->keyschenc, outbuf, outbuf,
+ nblocks * BLOCKSIZE);
+
+ while (nblocks >= 16)
+ {
+ OCB_OUTPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_OUTPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_OUTPUT(pos + 0);
+ OCB_OUTPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_OUTPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+ }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+#undef OCB_OUTPUT
+#undef OCB_OUTPUT_4
+#undef OCB_OUTPUT_16
+
+ c->u_mode.ocb.data_nblocks = blkn;
+ cipher_block_cpy (&c->u_iv.iv, &offset, BLOCKSIZE);
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+ return 0;
+}
+
+static NO_INLINE size_t
+aes_s390x_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks_arg)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ size_t nblocks = nblocks_arg;
+ u128_t blocks[64];
+ u128_t offset;
+ size_t max_blocks_used = 0;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+ unsigned int function = ctx->km_func | KM_DECRYPT;
+ const void *Ls[64];
+ const void **pl;
+
+ aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+ cipher_block_cpy (&offset, &c->u_iv.iv, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+ cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+ cipher_block_xor (outbuf + (n) * BLOCKSIZE, inbuf + (n) * BLOCKSIZE, \
+ &offset, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+ OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+ OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+ OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+ OCB_INPUT_4((n) + 12);
+
+#define OCB_OUTPUT(n) \
+ cipher_block_xor_1 (&blocks[n], outbuf + (n) * BLOCKSIZE, BLOCKSIZE); \
+ cipher_block_xor_1 (c->u_ctr.ctr, &blocks[n], BLOCKSIZE); \
+ cipher_block_cpy (outbuf + (n) * BLOCKSIZE, &blocks[n], BLOCKSIZE);
+
+#define OCB_OUTPUT_4(n) \
+ OCB_OUTPUT((n) + 0); OCB_OUTPUT((n) + 1); OCB_OUTPUT((n) + 2); \
+ OCB_OUTPUT((n) + 3)
+
+#define OCB_OUTPUT_16(n) \
+ OCB_OUTPUT_4((n) + 0); OCB_OUTPUT_4((n) + 4); OCB_OUTPUT_4((n) + 8); \
+ OCB_OUTPUT_4((n) + 12);
+
+ while (nblocks >= 64)
+ {
+ blkn += 64;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ OCB_INPUT_16(0);
+ OCB_INPUT_16(16);
+ OCB_INPUT_16(32);
+ OCB_INPUT_16(48);
+
+ km_execute (function, ctx->keyschenc, outbuf, outbuf, 64 * BLOCKSIZE);
+
+ asm volatile ("xc 0(256, %[out]), 0(%[blocks])\n\t"
+ "xc 256(256, %[out]), 256(%[blocks])\n\t"
+ "xc 512(256, %[out]), 512(%[blocks])\n\t"
+ "xc 768(256, %[out]), 768(%[blocks])\n\t"
+ :
+ : [out] "a" (outbuf), [blocks] "a" (blocks)
+ : "memory");
+
+ max_blocks_used = 64;
+ inbuf += 64 * BLOCKSIZE;
+ outbuf += 64 * BLOCKSIZE;
+ nblocks -= 64;
+ }
+
+ if (nblocks)
+ {
+ unsigned int pos = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ blkn += nblocks;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ while (nblocks >= 16)
+ {
+ OCB_INPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_INPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_INPUT(pos + 0);
+ OCB_INPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_INPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+
+ nblocks = pos;
+ pos = 0;
+ km_execute (function, ctx->keyschenc, outbuf, outbuf,
+ nblocks * BLOCKSIZE);
+
+ while (nblocks >= 16)
+ {
+ OCB_OUTPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_OUTPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_OUTPUT(pos + 0);
+ OCB_OUTPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_OUTPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+ }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+#undef OCB_OUTPUT
+#undef OCB_OUTPUT_4
+#undef OCB_OUTPUT_16
+
+ c->u_mode.ocb.data_nblocks = blkn;
+ cipher_block_cpy (&c->u_iv.iv, &offset, BLOCKSIZE);
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+ return 0;
+}
+
+static size_t
+aes_s390x_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks_arg, int encrypt)
+{
+ if (encrypt)
+ return aes_s390x_ocb_enc (c, outbuf_arg, inbuf_arg, nblocks_arg);
+ else
+ return aes_s390x_ocb_dec (c, outbuf_arg, inbuf_arg, nblocks_arg);
+}
+
+static size_t
+aes_s390x_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks_arg)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ u128_t blocks[64];
+ u128_t offset;
+ size_t max_blocks_used = 0;
+ u64 blkn = c->u_mode.ocb.aad_nblocks;
+ unsigned int function = ctx->km_func | KM_ENCRYPT;
+ const void *Ls[64];
+ const void **pl;
+
+ aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+ cipher_block_cpy (&offset, c->u_mode.ocb.aad_offset, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+ cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+ cipher_block_xor_1 (&blocks[n], abuf + (n) * BLOCKSIZE, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+ OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+ OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+ OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+ OCB_INPUT_4((n) + 12);
+
+ while (nblocks_arg >= 64)
+ {
+ blkn += 64;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ OCB_INPUT_16(0);
+ OCB_INPUT_16(16);
+ OCB_INPUT_16(32);
+ OCB_INPUT_16(48);
+
+ km_execute (function, ctx->keyschenc, blocks, blocks, 64 * BLOCKSIZE);
+
+ aes_s390x_ocb_checksum (c->u_mode.ocb.aad_sum, blocks, 64);
+
+ max_blocks_used = 64;
+ abuf += 64 * BLOCKSIZE;
+ nblocks_arg -= 64;
+ }
+
+ if (nblocks_arg > 0)
+ {
+ size_t nblocks = nblocks_arg;
+ unsigned int pos = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ blkn += nblocks;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ while (nblocks >= 16)
+ {
+ OCB_INPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_INPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_INPUT(pos + 0);
+ OCB_INPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_INPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+
+ nblocks = pos;
+ nblocks_arg -= pos;
+ pos = 0;
+ km_execute (function, ctx->keyschenc, blocks, blocks,
+ nblocks * BLOCKSIZE);
+
+ aes_s390x_ocb_checksum (c->u_mode.ocb.aad_sum, blocks, nblocks);
+ }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+
+ c->u_mode.ocb.aad_nblocks = blkn;
+ cipher_block_cpy (c->u_mode.ocb.aad_offset, &offset, BLOCKSIZE);
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+ return 0;
+}
+
+int _gcry_aes_s390x_setup_acceleration(RIJNDAEL_context *ctx,
+ unsigned int keylen,
+ unsigned int hwfeatures,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ unsigned int func;
+ unsigned int func_xts;
+ u128_t func_mask;
+ u128_t func_xts_mask;
+
+ if (!(hwfeatures & HWF_S390X_MSA))
+ return 0;
+
+ switch (keylen)
+ {
+ default:
+ case 16:
+ func = KM_FUNCTION_AES_128;
+ func_xts = KM_FUNCTION_XTS_AES_128;
+ func_mask = km_function_to_mask(KM_FUNCTION_AES_128);
+ func_xts_mask = km_function_to_mask(KM_FUNCTION_XTS_AES_128);
+ break;
+ case 24:
+ func = KM_FUNCTION_AES_192;
+ func_xts = 0;
+ func_mask = km_function_to_mask(KM_FUNCTION_AES_192);
+ func_xts_mask = 0; /* XTS-AES192 not available. */
+ break;
+ case 32:
+ func = KM_FUNCTION_AES_256;
+ func_xts = KM_FUNCTION_XTS_AES_256;
+ func_mask = km_function_to_mask(KM_FUNCTION_AES_256);
+ func_xts_mask = km_function_to_mask(KM_FUNCTION_AES_256);
+ break;
+ }
+
+ /* Query KM for supported algorithms and check if acceleration for
+ * requested key-length is available. */
+ if (!(km_query () & func_mask))
+ return 0;
+
+ ctx->km_func = func;
+
+ /* Query KM for supported XTS algorithms. */
+ if (km_query () & func_xts_mask)
+ ctx->km_func_xts = func_xts;
+
+ /* Query KMC for supported algorithms. */
+ if (kmc_query () & func_mask)
+ ctx->kmc_func = func;
+
+ /* Query KMAC for supported algorithms. */
+ if (kmac_query () & func_mask)
+ ctx->kmac_func = func;
+
+ if (hwfeatures & HWF_S390X_MSA_4)
+ {
+ /* Query KMF for supported algorithms. */
+ if (kmf_query () & func_mask)
+ ctx->kmf_func = func;
+
+ /* Query KMO for supported algorithms. */
+ if (kmo_query () & func_mask)
+ ctx->kmo_func = func;
+ }
+
+ if (hwfeatures & HWF_S390X_MSA_8)
+ {
+ /* Query KMA for supported algorithms. */
+ if (kma_query () & func_mask)
+ ctx->kma_func = func;
+ }
+
+ /* Setup zSeries bulk encryption/decryption routines. */
+
+ if (ctx->km_func)
+ {
+ bulk_ops->ocb_crypt = aes_s390x_ocb_crypt;
+ bulk_ops->ocb_auth = aes_s390x_ocb_auth;
+
+ /* CFB128 decryption uses KM instruction, instead of KMF. */
+ bulk_ops->cfb_dec = aes_s390x_cfb128_dec;
+ }
+
+ if (ctx->km_func_xts)
+ {
+ bulk_ops->xts_crypt = aes_s390x_xts_crypt;
+ }
+
+ if (ctx->kmc_func)
+ {
+ if(ctx->kmac_func)
+ {
+ /* Either KMC or KMAC used depending on 'cbc_mac' parameter. */
+ bulk_ops->cbc_enc = aes_s390x_cbc_enc;
+ }
+
+ bulk_ops->cbc_dec = aes_s390x_cbc_dec;
+ }
+
+ if (ctx->kmf_func)
+ {
+ bulk_ops->cfb_enc = aes_s390x_cfb128_enc;
+ }
+
+ if (ctx->kmo_func)
+ {
+ bulk_ops->ofb_enc = aes_s390x_ofb_enc;
+ }
+
+ if (ctx->kma_func)
+ {
+ bulk_ops->ctr_enc = aes_s390x_ctr128_enc;
+
+ if (kimd_query () & km_function_to_mask (KMID_FUNCTION_GHASH))
+ {
+ /* KIMD based GHASH implementation is required with AES-GCM
+ * acceleration. */
+ bulk_ops->gcm_crypt = aes_s390x_gcm_crypt;
+ }
+ }
+
+ return 1;
+}
+
+void _gcry_aes_s390x_setkey(RIJNDAEL_context *ctx, const byte *key)
+{
+ unsigned int keylen = 16 + (ctx->rounds - 10) * 4;
+ memcpy (ctx->keyschenc, key, keylen);
+}
+
+void _gcry_aes_s390x_prepare_decryption(RIJNDAEL_context *ctx)
+{
+ /* Do nothing. */
+ (void)ctx;
+}
+
+#endif /* USE_S390X_CRYPTO */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S b/comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S
new file mode 100644
index 0000000000..8124eb2198
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S
@@ -0,0 +1,874 @@
+/* SSSE3 vector permutation AES for Libgcrypt
+ * Copyright (C) 2014-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The code is based on the public domain library libvpaes version 0.5
+ * available at http://crypto.stanford.edu/vpaes/ and which carries
+ * this notice:
+ *
+ * libvpaes: constant-time SSSE3 AES encryption and decryption.
+ * version 0.5
+ *
+ * By Mike Hamburg, Stanford University, 2009. Public domain.
+ * I wrote essentially all of this code. I did not write the test
+ * vectors; they are the NIST known answer tests. I hereby release all
+ * the code and documentation here that I wrote into the public domain.
+ *
+ * This is an implementation of AES following my paper,
+ * "Accelerating AES with Vector Permute Instructions
+ * CHES 2009; http://shiftleft.org/papers/vector_aes/
+ */
+
+#if defined(__x86_64__)
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+##
+## _gcry_aes_ssse3_enc_preload
+##
+ELF(.type _gcry_aes_ssse3_enc_preload,@function)
+.globl _gcry_aes_ssse3_enc_preload
+_gcry_aes_ssse3_enc_preload:
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+ lea .Laes_consts(%rip), %rax
+ movdqa (%rax), %xmm9 # 0F
+ movdqa .Lk_inv (%rax), %xmm10 # inv
+ movdqa .Lk_inv+16(%rax), %xmm11 # inva
+ movdqa .Lk_sb1 (%rax), %xmm13 # sb1u
+ movdqa .Lk_sb1+16(%rax), %xmm12 # sb1t
+ movdqa .Lk_sb2 (%rax), %xmm15 # sb2u
+ movdqa .Lk_sb2+16(%rax), %xmm14 # sb2t
+ EXIT_SYSV_FUNC
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
+
+##
+## _gcry_aes_ssse3_dec_preload
+##
+ELF(.type _gcry_aes_ssse3_dec_preload,@function)
+.globl _gcry_aes_ssse3_dec_preload
+_gcry_aes_ssse3_dec_preload:
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+ lea .Laes_consts(%rip), %rax
+ movdqa (%rax), %xmm9 # 0F
+ movdqa .Lk_inv (%rax), %xmm10 # inv
+ movdqa .Lk_inv+16(%rax), %xmm11 # inva
+ movdqa .Lk_dsb9 (%rax), %xmm13 # sb9u
+ movdqa .Lk_dsb9+16(%rax), %xmm12 # sb9t
+ movdqa .Lk_dsbd (%rax), %xmm15 # sbdu
+ movdqa .Lk_dsbb (%rax), %xmm14 # sbbu
+ movdqa .Lk_dsbe (%rax), %xmm8 # sbeu
+ EXIT_SYSV_FUNC
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload)
+
+##
+## Constant-time SSSE3 AES core implementation.
+##
+## By Mike Hamburg (Stanford University), 2009
+## Public domain.
+##
+
+##
+## _aes_encrypt_core
+##
+## AES-encrypt %xmm0.
+##
+## Inputs:
+## %xmm0 = input
+## %xmm9-%xmm15 as in .Laes_preheat
+## (%rdi) = scheduled keys
+## %rsi = nrounds
+##
+## Output in %xmm0
+## Clobbers %xmm1-%xmm4, %r9, %r11, %rax, %rcx, %rdx
+## Preserves %xmm6 - %xmm7 so you get some local vectors
+##
+##
+.align 16
+ELF(.type _gcry_aes_ssse3_encrypt_core,@function)
+.globl _gcry_aes_ssse3_encrypt_core
+_gcry_aes_ssse3_encrypt_core:
+_aes_encrypt_core:
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+ mov %rdi, %rdx
+ leaq -1(%rsi), %rax
+ lea .Laes_consts(%rip), %rcx
+ leaq .Lk_mc_backward(%rcx), %rdi
+ mov $16, %rsi
+ movdqa .Lk_ipt (%rcx), %xmm2 # iptlo
+ movdqa %xmm9, %xmm1
+ pandn %xmm0, %xmm1
+ psrld $4, %xmm1
+ pand %xmm9, %xmm0
+ pshufb %xmm0, %xmm2
+ movdqa .Lk_ipt+16(%rcx), %xmm0 # ipthi
+ pshufb %xmm1, %xmm0
+ pxor (%rdx),%xmm2
+ pxor %xmm2, %xmm0
+ add $16, %rdx
+ jmp .Laes_entry
+
+.align 8
+.Laes_loop:
+ # middle of middle round
+ movdqa %xmm13, %xmm4 # 4 : sb1u
+ pshufb %xmm2, %xmm4 # 4 = sb1u
+ pxor (%rdx), %xmm4 # 4 = sb1u + k
+ movdqa %xmm12, %xmm0 # 0 : sb1t
+ pshufb %xmm3, %xmm0 # 0 = sb1t
+ pxor %xmm4, %xmm0 # 0 = A
+ movdqa %xmm15, %xmm4 # 4 : sb2u
+ pshufb %xmm2, %xmm4 # 4 = sb2u
+ movdqa .Lk_mc_forward-.Lk_mc_backward(%rsi,%rdi), %xmm1
+ movdqa %xmm14, %xmm2 # 2 : sb2t
+ pshufb %xmm3, %xmm2 # 2 = sb2t
+ pxor %xmm4, %xmm2 # 2 = 2A
+ movdqa %xmm0, %xmm3 # 3 = A
+ pshufb %xmm1, %xmm0 # 0 = B
+ pxor %xmm2, %xmm0 # 0 = 2A+B
+ pshufb (%rsi,%rdi), %xmm3 # 3 = D
+ lea 16(%esi),%esi # next mc
+ pxor %xmm0, %xmm3 # 3 = 2A+B+D
+ lea 16(%rdx),%rdx # next key
+ pshufb %xmm1, %xmm0 # 0 = 2B+C
+ pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D
+ and $48, %rsi # ... mod 4
+ dec %rax # nr--
+
+.Laes_entry:
+ # top of round
+ movdqa %xmm9, %xmm1 # 1 : i
+ pandn %xmm0, %xmm1 # 1 = i<<4
+ psrld $4, %xmm1 # 1 = i
+ pand %xmm9, %xmm0 # 0 = k
+ movdqa %xmm11, %xmm2 # 2 : a/k
+ pshufb %xmm0, %xmm2 # 2 = a/k
+ pxor %xmm1, %xmm0 # 0 = j
+ movdqa %xmm10, %xmm3 # 3 : 1/i
+ pshufb %xmm1, %xmm3 # 3 = 1/i
+ pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k
+ movdqa %xmm10, %xmm4 # 4 : 1/j
+ pshufb %xmm0, %xmm4 # 4 = 1/j
+ pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k
+ movdqa %xmm10, %xmm2 # 2 : 1/iak
+ pshufb %xmm3, %xmm2 # 2 = 1/iak
+ pxor %xmm0, %xmm2 # 2 = io
+ movdqa %xmm10, %xmm3 # 3 : 1/jak
+ pshufb %xmm4, %xmm3 # 3 = 1/jak
+ pxor %xmm1, %xmm3 # 3 = jo
+ jnz .Laes_loop
+
+ # middle of last round
+ movdqa .Lk_sbo(%rcx), %xmm4 # 3 : sbou
+ pshufb %xmm2, %xmm4 # 4 = sbou
+ pxor (%rdx), %xmm4 # 4 = sb1u + k
+ movdqa .Lk_sbo+16(%rcx), %xmm0 # 0 : sbot
+ pshufb %xmm3, %xmm0 # 0 = sb1t
+ pxor %xmm4, %xmm0 # 0 = A
+ pshufb .Lk_sr(%rsi,%rcx), %xmm0
+ EXIT_SYSV_FUNC
+ ret
+ CFI_ENDPROC();
+ELF(.size _aes_encrypt_core,.-_aes_encrypt_core)
+
+##
+## Decryption core
+##
+## Same API as encryption core.
+##
+.align 16
+.globl _gcry_aes_ssse3_decrypt_core
+ELF(.type _gcry_aes_ssse3_decrypt_core,@function)
+_gcry_aes_ssse3_decrypt_core:
+_aes_decrypt_core:
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+ mov %rdi, %rdx
+ lea .Laes_consts(%rip), %rcx
+ subl $1, %esi
+ movl %esi, %eax
+ shll $4, %esi
+ xorl $48, %esi
+ andl $48, %esi
+ movdqa .Lk_dipt (%rcx), %xmm2 # iptlo
+ movdqa %xmm9, %xmm1
+ pandn %xmm0, %xmm1
+ psrld $4, %xmm1
+ pand %xmm9, %xmm0
+ pshufb %xmm0, %xmm2
+ movdqa .Lk_dipt+16(%rcx), %xmm0 # ipthi
+ pshufb %xmm1, %xmm0
+ pxor (%rdx), %xmm2
+ pxor %xmm2, %xmm0
+ movdqa .Lk_mc_forward+48(%rcx), %xmm5
+ lea 16(%rdx), %rdx
+ neg %rax
+ jmp .Laes_dec_entry
+
+.align 16
+.Laes_dec_loop:
+##
+## Inverse mix columns
+##
+ movdqa %xmm13, %xmm4 # 4 : sb9u
+ pshufb %xmm2, %xmm4 # 4 = sb9u
+ pxor (%rdx), %xmm4
+ movdqa %xmm12, %xmm0 # 0 : sb9t
+ pshufb %xmm3, %xmm0 # 0 = sb9t
+ movdqa .Lk_dsbd+16(%rcx),%xmm1 # 1 : sbdt
+ pxor %xmm4, %xmm0 # 0 = ch
+ lea 16(%rdx), %rdx # next round key
+
+ pshufb %xmm5, %xmm0 # MC ch
+ movdqa %xmm15, %xmm4 # 4 : sbdu
+ pshufb %xmm2, %xmm4 # 4 = sbdu
+ pxor %xmm0, %xmm4 # 4 = ch
+ pshufb %xmm3, %xmm1 # 1 = sbdt
+ pxor %xmm4, %xmm1 # 1 = ch
+
+ pshufb %xmm5, %xmm1 # MC ch
+ movdqa %xmm14, %xmm4 # 4 : sbbu
+ pshufb %xmm2, %xmm4 # 4 = sbbu
+ inc %rax # nr--
+ pxor %xmm1, %xmm4 # 4 = ch
+ movdqa .Lk_dsbb+16(%rcx),%xmm0 # 0 : sbbt
+ pshufb %xmm3, %xmm0 # 0 = sbbt
+ pxor %xmm4, %xmm0 # 0 = ch
+
+ pshufb %xmm5, %xmm0 # MC ch
+ movdqa %xmm8, %xmm4 # 4 : sbeu
+ pshufb %xmm2, %xmm4 # 4 = sbeu
+ pshufd $0x93, %xmm5, %xmm5
+ pxor %xmm0, %xmm4 # 4 = ch
+ movdqa .Lk_dsbe+16(%rcx),%xmm0 # 0 : sbet
+ pshufb %xmm3, %xmm0 # 0 = sbet
+ pxor %xmm4, %xmm0 # 0 = ch
+
+.Laes_dec_entry:
+ # top of round
+ movdqa %xmm9, %xmm1 # 1 : i
+ pandn %xmm0, %xmm1 # 1 = i<<4
+ psrld $4, %xmm1 # 1 = i
+ pand %xmm9, %xmm0 # 0 = k
+ movdqa %xmm11, %xmm2 # 2 : a/k
+ pshufb %xmm0, %xmm2 # 2 = a/k
+ pxor %xmm1, %xmm0 # 0 = j
+ movdqa %xmm10, %xmm3 # 3 : 1/i
+ pshufb %xmm1, %xmm3 # 3 = 1/i
+ pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k
+ movdqa %xmm10, %xmm4 # 4 : 1/j
+ pshufb %xmm0, %xmm4 # 4 = 1/j
+ pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k
+ movdqa %xmm10, %xmm2 # 2 : 1/iak
+ pshufb %xmm3, %xmm2 # 2 = 1/iak
+ pxor %xmm0, %xmm2 # 2 = io
+ movdqa %xmm10, %xmm3 # 3 : 1/jak
+ pshufb %xmm4, %xmm3 # 3 = 1/jak
+ pxor %xmm1, %xmm3 # 3 = jo
+ jnz .Laes_dec_loop
+
+ # middle of last round
+ movdqa .Lk_dsbo(%rcx), %xmm4 # 3 : sbou
+ pshufb %xmm2, %xmm4 # 4 = sbou
+ pxor (%rdx), %xmm4 # 4 = sb1u + k
+ movdqa .Lk_dsbo+16(%rcx), %xmm0 # 0 : sbot
+ pshufb %xmm3, %xmm0 # 0 = sb1t
+ pxor %xmm4, %xmm0 # 0 = A
+ pshufb .Lk_sr(%rsi,%rcx), %xmm0
+ EXIT_SYSV_FUNC
+ ret
+ CFI_ENDPROC();
+ELF(.size _aes_decrypt_core,.-_aes_decrypt_core)
+
+########################################################
+## ##
+## AES key schedule ##
+## ##
+########################################################
+
+.align 16
+.globl _gcry_aes_ssse3_schedule_core
+ELF(.type _gcry_aes_ssse3_schedule_core,@function)
+_gcry_aes_ssse3_schedule_core:
+_aes_schedule_core:
+ # rdi = key
+ # rsi = size in bits
+ # rdx = buffer
+ # rcx = direction. 0=encrypt, 1=decrypt
+ # r8 = rotoffs
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_5
+
+ # load the tables
+ lea .Laes_consts(%rip), %r10
+ movdqa (%r10), %xmm9 # 0F
+ movdqa .Lk_inv (%r10), %xmm10 # inv
+ movdqa .Lk_inv+16(%r10), %xmm11 # inva
+ movdqa .Lk_sb1 (%r10), %xmm13 # sb1u
+ movdqa .Lk_sb1+16(%r10), %xmm12 # sb1t
+ movdqa .Lk_sb2 (%r10), %xmm15 # sb2u
+ movdqa .Lk_sb2+16(%r10), %xmm14 # sb2t
+
+ movdqa .Lk_rcon(%r10), %xmm8 # load rcon
+ movdqu (%rdi), %xmm0 # load key (unaligned)
+
+ # input transform
+ movdqu %xmm0, %xmm3
+ lea .Lk_ipt(%r10), %r11
+ call .Laes_schedule_transform
+ movdqu %xmm0, %xmm7
+
+ test %rcx, %rcx
+ jnz .Laes_schedule_am_decrypting
+
+ # encrypting, output zeroth round key after transform
+ movdqa %xmm0, (%rdx)
+ jmp .Laes_schedule_go
+
+.Laes_schedule_am_decrypting:
+ # decrypting, output zeroth round key after shiftrows
+ pshufb .Lk_sr(%r8,%r10),%xmm3
+ movdqa %xmm3, (%rdx)
+ xor $48, %r8
+
+.Laes_schedule_go:
+ cmp $192, %rsi
+ je .Laes_schedule_192
+ cmp $256, %rsi
+ je .Laes_schedule_256
+ # 128: fall though
+
+##
+## .Laes_schedule_128
+##
+## 128-bit specific part of key schedule.
+##
+## This schedule is really simple, because all its parts
+## are accomplished by the subroutines.
+##
+.Laes_schedule_128:
+ mov $10, %rsi
+
+.Laes_schedule_128_L:
+ call .Laes_schedule_round
+ dec %rsi
+ jz .Laes_schedule_mangle_last
+ call .Laes_schedule_mangle # write output
+ jmp .Laes_schedule_128_L
+
+##
+## .Laes_schedule_192
+##
+## 192-bit specific part of key schedule.
+##
+## The main body of this schedule is the same as the 128-bit
+## schedule, but with more smearing. The long, high side is
+## stored in %xmm7 as before, and the short, low side is in
+## the high bits of %xmm6.
+##
+## This schedule is somewhat nastier, however, because each
+## round produces 192 bits of key material, or 1.5 round keys.
+## Therefore, on each cycle we do 2 rounds and produce 3 round
+## keys.
+##
+.Laes_schedule_192:
+ movdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
+ call .Laes_schedule_transform # input transform
+ pshufd $0x0E, %xmm0, %xmm6
+ pslldq $8, %xmm6 # clobber low side with zeros
+ mov $4, %rsi
+
+.Laes_schedule_192_L:
+ call .Laes_schedule_round
+ palignr $8,%xmm6,%xmm0
+ call .Laes_schedule_mangle # save key n
+ call .Laes_schedule_192_smear
+ call .Laes_schedule_mangle # save key n+1
+ call .Laes_schedule_round
+ dec %rsi
+ jz .Laes_schedule_mangle_last
+ call .Laes_schedule_mangle # save key n+2
+ call .Laes_schedule_192_smear
+ jmp .Laes_schedule_192_L
+
+##
+## .Laes_schedule_192_smear
+##
+## Smear the short, low side in the 192-bit key schedule.
+##
+## Inputs:
+## %xmm7: high side, b a x y
+## %xmm6: low side, d c 0 0
+## %xmm13: 0
+##
+## Outputs:
+## %xmm6: b+c+d b+c 0 0
+## %xmm0: b+c+d b+c b a
+##
+.Laes_schedule_192_smear:
+ pshufd $0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0
+ pxor %xmm0, %xmm6 # -> c+d c 0 0
+ pshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
+ pxor %xmm6, %xmm0 # -> b+c+d b+c b a
+ pshufd $0x0E, %xmm0, %xmm6
+ pslldq $8, %xmm6 # clobber low side with zeros
+ ret
+
+##
+## .Laes_schedule_256
+##
+## 256-bit specific part of key schedule.
+##
+## The structure here is very similar to the 128-bit
+## schedule, but with an additional 'low side' in
+## %xmm6. The low side's rounds are the same as the
+## high side's, except no rcon and no rotation.
+##
+.Laes_schedule_256:
+ movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
+ call .Laes_schedule_transform # input transform
+ mov $7, %rsi
+
+.Laes_schedule_256_L:
+ call .Laes_schedule_mangle # output low result
+ movdqa %xmm0, %xmm6 # save cur_lo in xmm6
+
+ # high round
+ call .Laes_schedule_round
+ dec %rsi
+ jz .Laes_schedule_mangle_last
+ call .Laes_schedule_mangle
+
+ # low round. swap xmm7 and xmm6
+ pshufd $0xFF, %xmm0, %xmm0
+ movdqa %xmm7, %xmm5
+ movdqa %xmm6, %xmm7
+ call .Laes_schedule_low_round
+ movdqa %xmm5, %xmm7
+
+ jmp .Laes_schedule_256_L
+
+##
+## .Laes_schedule_round
+##
+## Runs one main round of the key schedule on %xmm0, %xmm7
+##
+## Specifically, runs subbytes on the high dword of %xmm0
+## then rotates it by one byte and xors into the low dword of
+## %xmm7.
+##
+## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
+## next rcon.
+##
+## Smears the dwords of %xmm7 by xoring the low into the
+## second low, result into third, result into highest.
+##
+## Returns results in %xmm7 = %xmm0.
+## Clobbers %xmm1-%xmm4, %r11.
+##
+.Laes_schedule_round:
+ # extract rcon from xmm8
+ pxor %xmm1, %xmm1
+ palignr $15, %xmm8, %xmm1
+ palignr $15, %xmm8, %xmm8
+ pxor %xmm1, %xmm7
+
+ # rotate
+ pshufd $0xFF, %xmm0, %xmm0
+ palignr $1, %xmm0, %xmm0
+
+ # fall through...
+
+ # low round: same as high round, but no rotation and no rcon.
+.Laes_schedule_low_round:
+ # smear xmm7
+ movdqa %xmm7, %xmm1
+ pslldq $4, %xmm7
+ pxor %xmm1, %xmm7
+ movdqa %xmm7, %xmm1
+ pslldq $8, %xmm7
+ pxor %xmm1, %xmm7
+ pxor .Lk_s63(%r10), %xmm7
+
+ # subbytes
+ movdqa %xmm9, %xmm1
+ pandn %xmm0, %xmm1
+ psrld $4, %xmm1 # 1 = i
+ pand %xmm9, %xmm0 # 0 = k
+ movdqa %xmm11, %xmm2 # 2 : a/k
+ pshufb %xmm0, %xmm2 # 2 = a/k
+ pxor %xmm1, %xmm0 # 0 = j
+ movdqa %xmm10, %xmm3 # 3 : 1/i
+ pshufb %xmm1, %xmm3 # 3 = 1/i
+ pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k
+ movdqa %xmm10, %xmm4 # 4 : 1/j
+ pshufb %xmm0, %xmm4 # 4 = 1/j
+ pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k
+ movdqa %xmm10, %xmm2 # 2 : 1/iak
+ pshufb %xmm3, %xmm2 # 2 = 1/iak
+ pxor %xmm0, %xmm2 # 2 = io
+ movdqa %xmm10, %xmm3 # 3 : 1/jak
+ pshufb %xmm4, %xmm3 # 3 = 1/jak
+ pxor %xmm1, %xmm3 # 3 = jo
+ movdqa .Lk_sb1(%r10), %xmm4 # 4 : sbou
+ pshufb %xmm2, %xmm4 # 4 = sbou
+ movdqa .Lk_sb1+16(%r10), %xmm0 # 0 : sbot
+ pshufb %xmm3, %xmm0 # 0 = sb1t
+ pxor %xmm4, %xmm0 # 0 = sbox output
+
+ # add in smeared stuff
+ pxor %xmm7, %xmm0
+ movdqa %xmm0, %xmm7
+ ret
+
+##
+## .Laes_schedule_transform
+##
+## Linear-transform %xmm0 according to tables at (%r11)
+##
+## Requires that %xmm9 = 0x0F0F... as in preheat
+## Output in %xmm0
+## Clobbers %xmm1, %xmm2
+##
+.Laes_schedule_transform:
+ movdqa %xmm9, %xmm1
+ pandn %xmm0, %xmm1
+ psrld $4, %xmm1
+ pand %xmm9, %xmm0
+ movdqa (%r11), %xmm2 # lo
+ pshufb %xmm0, %xmm2
+ movdqa 16(%r11), %xmm0 # hi
+ pshufb %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ ret
+
+##
+## .Laes_schedule_mangle
+##
+## Mangle xmm0 from (basis-transformed) standard version
+## to our version.
+##
+## On encrypt,
+## xor with 0x63
+## multiply by circulant 0,1,1,1
+## apply shiftrows transform
+##
+## On decrypt,
+## xor with 0x63
+## multiply by 'inverse mixcolumns' circulant E,B,D,9
+## deskew
+## apply shiftrows transform
+##
+##
+## Writes out to (%rdx), and increments or decrements it
+## Keeps track of round number mod 4 in %r8
+## Preserves xmm0
+## Clobbers xmm1-xmm5
+##
+.Laes_schedule_mangle:
+ movdqa %xmm0, %xmm4 # save xmm0 for later
+ movdqa .Lk_mc_forward(%r10),%xmm5
+ test %rcx, %rcx
+ jnz .Laes_schedule_mangle_dec
+
+ # encrypting
+ add $16, %rdx
+ pxor .Lk_s63(%r10),%xmm4
+ pshufb %xmm5, %xmm4
+ movdqa %xmm4, %xmm3
+ pshufb %xmm5, %xmm4
+ pxor %xmm4, %xmm3
+ pshufb %xmm5, %xmm4
+ pxor %xmm4, %xmm3
+
+ jmp .Laes_schedule_mangle_both
+
+.Laes_schedule_mangle_dec:
+ lea .Lk_dks_1(%r10), %r11 # first table: *9
+ call .Laes_schedule_transform
+ movdqa %xmm0, %xmm3
+ pshufb %xmm5, %xmm3
+
+ add $32, %r11 # next table: *B
+ call .Laes_schedule_transform
+ pxor %xmm0, %xmm3
+ pshufb %xmm5, %xmm3
+
+ add $32, %r11 # next table: *D
+ call .Laes_schedule_transform
+ pxor %xmm0, %xmm3
+ pshufb %xmm5, %xmm3
+
+ add $32, %r11 # next table: *E
+ call .Laes_schedule_transform
+ pxor %xmm0, %xmm3
+ pshufb %xmm5, %xmm3
+
+ movdqa %xmm4, %xmm0 # restore %xmm0
+ add $-16, %rdx
+
+.Laes_schedule_mangle_both:
+ pshufb .Lk_sr(%r8,%r10),%xmm3
+ add $-16, %r8
+ and $48, %r8
+ movdqa %xmm3, (%rdx)
+ ret
+
+##
+## .Laes_schedule_mangle_last
+##
+## Mangler for last round of key schedule
+## Mangles %xmm0
+## when encrypting, outputs out(%xmm0) ^ 63
+## when decrypting, outputs unskew(%xmm0)
+##
+## Always called right before return... jumps to cleanup and exits
+##
+.Laes_schedule_mangle_last:
+ # schedule last round key from xmm0
+ lea .Lk_deskew(%r10),%r11 # prepare to deskew
+ test %rcx, %rcx
+ jnz .Laes_schedule_mangle_last_dec
+
+ # encrypting
+ pshufb .Lk_sr(%r8,%r10),%xmm0 # output permute
+ lea .Lk_opt(%r10), %r11 # prepare to output transform
+ add $32, %rdx
+
+.Laes_schedule_mangle_last_dec:
+ add $-16, %rdx
+ pxor .Lk_s63(%r10), %xmm0
+ call .Laes_schedule_transform # output transform
+ movdqa %xmm0, (%rdx) # save last key
+
+ #_aes_cleanup
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+ pxor %xmm4, %xmm4
+ pxor %xmm5, %xmm5
+ pxor %xmm6, %xmm6
+ pxor %xmm7, %xmm7
+ pxor %xmm8, %xmm8
+ EXIT_SYSV_FUNC
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_aes_ssse3_schedule_core,.-_gcry_aes_ssse3_schedule_core)
+
+########################################################
+## ##
+## Constants ##
+## ##
+########################################################
+
+.align 16
+ELF(.type _aes_consts,@object)
+.Laes_consts:
+_aes_consts:
+ # s0F
+ .Lk_s0F = .-.Laes_consts
+ .quad 0x0F0F0F0F0F0F0F0F
+ .quad 0x0F0F0F0F0F0F0F0F
+
+ # input transform (lo, hi)
+ .Lk_ipt = .-.Laes_consts
+ .quad 0xC2B2E8985A2A7000
+ .quad 0xCABAE09052227808
+ .quad 0x4C01307D317C4D00
+ .quad 0xCD80B1FCB0FDCC81
+
+ # inv, inva
+ .Lk_inv = .-.Laes_consts
+ .quad 0x0E05060F0D080180
+ .quad 0x040703090A0B0C02
+ .quad 0x01040A060F0B0780
+ .quad 0x030D0E0C02050809
+
+ # sb1u, sb1t
+ .Lk_sb1 = .-.Laes_consts
+ .quad 0xB19BE18FCB503E00
+ .quad 0xA5DF7A6E142AF544
+ .quad 0x3618D415FAE22300
+ .quad 0x3BF7CCC10D2ED9EF
+
+
+ # sb2u, sb2t
+ .Lk_sb2 = .-.Laes_consts
+ .quad 0xE27A93C60B712400
+ .quad 0x5EB7E955BC982FCD
+ .quad 0x69EB88400AE12900
+ .quad 0xC2A163C8AB82234A
+
+ # sbou, sbot
+ .Lk_sbo = .-.Laes_consts
+ .quad 0xD0D26D176FBDC700
+ .quad 0x15AABF7AC502A878
+ .quad 0xCFE474A55FBB6A00
+ .quad 0x8E1E90D1412B35FA
+
+ # mc_forward
+ .Lk_mc_forward = .-.Laes_consts
+ .quad 0x0407060500030201
+ .quad 0x0C0F0E0D080B0A09
+ .quad 0x080B0A0904070605
+ .quad 0x000302010C0F0E0D
+ .quad 0x0C0F0E0D080B0A09
+ .quad 0x0407060500030201
+ .quad 0x000302010C0F0E0D
+ .quad 0x080B0A0904070605
+
+ # mc_backward
+ .Lk_mc_backward = .-.Laes_consts
+ .quad 0x0605040702010003
+ .quad 0x0E0D0C0F0A09080B
+ .quad 0x020100030E0D0C0F
+ .quad 0x0A09080B06050407
+ .quad 0x0E0D0C0F0A09080B
+ .quad 0x0605040702010003
+ .quad 0x0A09080B06050407
+ .quad 0x020100030E0D0C0F
+
+ # sr
+ .Lk_sr = .-.Laes_consts
+ .quad 0x0706050403020100
+ .quad 0x0F0E0D0C0B0A0908
+ .quad 0x030E09040F0A0500
+ .quad 0x0B06010C07020D08
+ .quad 0x0F060D040B020900
+ .quad 0x070E050C030A0108
+ .quad 0x0B0E0104070A0D00
+ .quad 0x0306090C0F020508
+
+ # rcon
+ .Lk_rcon = .-.Laes_consts
+ .quad 0x1F8391B9AF9DEEB6
+ .quad 0x702A98084D7C7D81
+
+ # s63: all equal to 0x63 transformed
+ .Lk_s63 = .-.Laes_consts
+ .quad 0x5B5B5B5B5B5B5B5B
+ .quad 0x5B5B5B5B5B5B5B5B
+
+ # output transform
+ .Lk_opt = .-.Laes_consts
+ .quad 0xFF9F4929D6B66000
+ .quad 0xF7974121DEBE6808
+ .quad 0x01EDBD5150BCEC00
+ .quad 0xE10D5DB1B05C0CE0
+
+ # deskew tables: inverts the sbox's 'skew'
+ .Lk_deskew = .-.Laes_consts
+ .quad 0x07E4A34047A4E300
+ .quad 0x1DFEB95A5DBEF91A
+ .quad 0x5F36B5DC83EA6900
+ .quad 0x2841C2ABF49D1E77
+
+##
+## Decryption stuff
+## Key schedule constants
+##
+ # decryption key schedule: x -> invskew x*9
+ .Lk_dks_1 = .-.Laes_consts
+ .quad 0xB6116FC87ED9A700
+ .quad 0x4AED933482255BFC
+ .quad 0x4576516227143300
+ .quad 0x8BB89FACE9DAFDCE
+
+ # decryption key schedule: invskew x*9 -> invskew x*D
+ .Lk_dks_2 = .-.Laes_consts
+ .quad 0x27438FEBCCA86400
+ .quad 0x4622EE8AADC90561
+ .quad 0x815C13CE4F92DD00
+ .quad 0x73AEE13CBD602FF2
+
+ # decryption key schedule: invskew x*D -> invskew x*B
+ .Lk_dks_3 = .-.Laes_consts
+ .quad 0x03C4C50201C6C700
+ .quad 0xF83F3EF9FA3D3CFB
+ .quad 0xEE1921D638CFF700
+ .quad 0xA5526A9D7384BC4B
+
+ # decryption key schedule: invskew x*B -> invskew x*E + 0x63
+ .Lk_dks_4 = .-.Laes_consts
+ .quad 0xE3C390B053732000
+ .quad 0xA080D3F310306343
+ .quad 0xA0CA214B036982E8
+ .quad 0x2F45AEC48CE60D67
+
+##
+## Decryption stuff
+## Round function constants
+##
+ # decryption input transform
+ .Lk_dipt = .-.Laes_consts
+ .quad 0x0F505B040B545F00
+ .quad 0x154A411E114E451A
+ .quad 0x86E383E660056500
+ .quad 0x12771772F491F194
+
+ # decryption sbox output *9*u, *9*t
+ .Lk_dsb9 = .-.Laes_consts
+ .quad 0x851C03539A86D600
+ .quad 0xCAD51F504F994CC9
+ .quad 0xC03B1789ECD74900
+ .quad 0x725E2C9EB2FBA565
+
+ # decryption sbox output *D*u, *D*t
+ .Lk_dsbd = .-.Laes_consts
+ .quad 0x7D57CCDFE6B1A200
+ .quad 0xF56E9B13882A4439
+ .quad 0x3CE2FAF724C6CB00
+ .quad 0x2931180D15DEEFD3
+
+ # decryption sbox output *B*u, *B*t
+ .Lk_dsbb = .-.Laes_consts
+ .quad 0xD022649296B44200
+ .quad 0x602646F6B0F2D404
+ .quad 0xC19498A6CD596700
+ .quad 0xF3FF0C3E3255AA6B
+
+ # decryption sbox output *E*u, *E*t
+ .Lk_dsbe = .-.Laes_consts
+ .quad 0x46F2929626D4D000
+ .quad 0x2242600464B4F6B0
+ .quad 0x0C55A6CDFFAAC100
+ .quad 0x9467F36B98593E32
+
+ # decryption sbox final output
+ .Lk_dsbo = .-.Laes_consts
+ .quad 0x1387EA537EF94000
+ .quad 0xC7AA6DB9D4943E2D
+ .quad 0x12D7560F93441D00
+ .quad 0xCA4B8159D8C58E9C
+ELF(.size _aes_consts,.-_aes_consts)
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64.c b/comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64.c
new file mode 100644
index 0000000000..b07238531c
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64.c
@@ -0,0 +1,743 @@
+/* SSSE3 vector permutation AES for Libgcrypt
+ * Copyright (C) 2014-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The code is based on the public domain library libvpaes version 0.5
+ * available at http://crypto.stanford.edu/vpaes/ and which carries
+ * this notice:
+ *
+ * libvpaes: constant-time SSSE3 AES encryption and decryption.
+ * version 0.5
+ *
+ * By Mike Hamburg, Stanford University, 2009. Public domain.
+ * I wrote essentially all of this code. I did not write the test
+ * vectors; they are the NIST known answer tests. I hereby release all
+ * the code and documentation here that I wrote into the public domain.
+ *
+ * This is an implementation of AES following my paper,
+ * "Accelerating AES with Vector Permute Instructions"
+ * CHES 2009; http://shiftleft.org/papers/vector_aes/
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_SSSE3
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+# pragma GCC target("no-sse")
+#endif
+#if __clang__
+# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+
+
+/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l
+ because of 'pragma target'. */
+static ASM_FUNC_ATTR_INLINE const unsigned char *
+aes_ocb_get_l (gcry_cipher_hd_t c, u64 n)
+{
+ unsigned long ntz;
+
+ /* Assumes that N != 0. */
+ asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [low] "r" ((unsigned long)n)
+ : "cc");
+
+ return c->u_mode.ocb.L[ntz];
+}
+
+
+/* Assembly functions in rijndael-ssse3-amd64-asm.S. Note that these
+ have custom calling convention (additional XMM parameters). */
+extern void _gcry_aes_ssse3_enc_preload(void);
+extern void _gcry_aes_ssse3_dec_preload(void);
+extern void _gcry_aes_ssse3_schedule_core(const void *key, u64 keybits,
+ void *buffer, u64 decrypt,
+ u64 rotoffs);
+extern void _gcry_aes_ssse3_encrypt_core(const void *key, u64 nrounds);
+extern void _gcry_aes_ssse3_decrypt_core(const void *key, u64 nrounds);
+
+
+
+/* Two macros to be called prior and after the use of SSSE3
+ instructions. There should be no external function calls between
+ the use of these macros. There purpose is to make sure that the
+ SSE registers are cleared and won't reveal any information about
+ the key or the data. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define SSSE3_STATE_SIZE (16 * 10)
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define vpaes_ssse3_prepare() \
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" \
+ "movdqu %%xmm7, 1*16(%0)\n\t" \
+ "movdqu %%xmm8, 2*16(%0)\n\t" \
+ "movdqu %%xmm9, 3*16(%0)\n\t" \
+ "movdqu %%xmm10, 4*16(%0)\n\t" \
+ "movdqu %%xmm11, 5*16(%0)\n\t" \
+ "movdqu %%xmm12, 6*16(%0)\n\t" \
+ "movdqu %%xmm13, 7*16(%0)\n\t" \
+ "movdqu %%xmm14, 8*16(%0)\n\t" \
+ "movdqu %%xmm15, 9*16(%0)\n\t" \
+ : \
+ : "r" (ssse3_state) \
+ : "memory" )
+# define vpaes_ssse3_cleanup() \
+ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \
+ "pxor %%xmm1, %%xmm1 \n\t" \
+ "pxor %%xmm2, %%xmm2 \n\t" \
+ "pxor %%xmm3, %%xmm3 \n\t" \
+ "pxor %%xmm4, %%xmm4 \n\t" \
+ "pxor %%xmm5, %%xmm5 \n\t" \
+ "movdqu 0*16(%0), %%xmm6 \n\t" \
+ "movdqu 1*16(%0), %%xmm7 \n\t" \
+ "movdqu 2*16(%0), %%xmm8 \n\t" \
+ "movdqu 3*16(%0), %%xmm9 \n\t" \
+ "movdqu 4*16(%0), %%xmm10 \n\t" \
+ "movdqu 5*16(%0), %%xmm11 \n\t" \
+ "movdqu 6*16(%0), %%xmm12 \n\t" \
+ "movdqu 7*16(%0), %%xmm13 \n\t" \
+ "movdqu 8*16(%0), %%xmm14 \n\t" \
+ "movdqu 9*16(%0), %%xmm15 \n\t" \
+ : \
+ : "r" (ssse3_state) \
+ : "memory" )
+#else
+# define SSSE3_STATE_SIZE 1
+# define vpaes_ssse3_prepare() (void)ssse3_state
+# define vpaes_ssse3_cleanup() \
+ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \
+ "pxor %%xmm1, %%xmm1 \n\t" \
+ "pxor %%xmm2, %%xmm2 \n\t" \
+ "pxor %%xmm3, %%xmm3 \n\t" \
+ "pxor %%xmm4, %%xmm4 \n\t" \
+ "pxor %%xmm5, %%xmm5 \n\t" \
+ "pxor %%xmm6, %%xmm6 \n\t" \
+ "pxor %%xmm7, %%xmm7 \n\t" \
+ "pxor %%xmm8, %%xmm8 \n\t" \
+ ::: "memory" )
+#endif
+
+#define vpaes_ssse3_prepare_enc() \
+ vpaes_ssse3_prepare(); \
+ _gcry_aes_ssse3_enc_preload();
+
+#define vpaes_ssse3_prepare_dec() \
+ vpaes_ssse3_prepare(); \
+ _gcry_aes_ssse3_dec_preload();
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+ unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare();
+
+ _gcry_aes_ssse3_schedule_core(key, keybits, &ctx->keyschenc32[0][0], 0, 48);
+
+ /* Save key for setting up decryption. */
+ if (keybits > 192)
+ asm volatile ("movdqu (%[src]), %%xmm0\n\t"
+ "movdqu 16(%[src]), %%xmm1\n\t"
+ "movdqu %%xmm0, (%[dst])\n\t"
+ "movdqu %%xmm1, 16(%[dst])\n\t"
+ : /* No output */
+ : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key)
+ : "memory" );
+ else if (keybits == 192)
+ asm volatile ("movdqu (%[src]), %%xmm0\n\t"
+ "movq 16(%[src]), %%xmm1\n\t"
+ "movdqu %%xmm0, (%[dst])\n\t"
+ "movq %%xmm1, 16(%[dst])\n\t"
+ : /* No output */
+ : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key)
+ : "memory" );
+ else
+ asm volatile ("movdqu (%[src]), %%xmm0\n\t"
+ "movdqu %%xmm0, (%[dst])\n\t"
+ : /* No output */
+ : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key)
+ : "memory" );
+
+ vpaes_ssse3_cleanup();
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+do_ssse3_prepare_decryption (RIJNDAEL_context *ctx,
+ byte ssse3_state[SSSE3_STATE_SIZE])
+{
+ unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
+
+ vpaes_ssse3_prepare();
+
+ _gcry_aes_ssse3_schedule_core(&ctx->keyschdec32[0][0], keybits,
+ &ctx->keyschdec32[ctx->rounds][0], 1,
+ (keybits == 192) ? 0 : 32);
+
+ vpaes_ssse3_cleanup();
+}
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ do_ssse3_prepare_decryption(ctx, ssse3_state);
+}
+
+
+/* Encrypt one block using the Intel SSSE3 instructions. Block is input
+* and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_vpaes_ssse3_enc (const RIJNDAEL_context *ctx, unsigned int nrounds)
+{
+ _gcry_aes_ssse3_encrypt_core(ctx->keyschenc32, nrounds);
+}
+
+
+/* Decrypt one block using the Intel SSSE3 instructions. Block is input
+* and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_vpaes_ssse3_dec (const RIJNDAEL_context *ctx, unsigned int nrounds)
+{
+ _gcry_aes_ssse3_decrypt_core(ctx->keyschdec32, nrounds);
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+ const unsigned char *src)
+{
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare_enc ();
+ asm volatile ("movdqu %[src], %%xmm0\n\t"
+ :
+ : [src] "m" (*src)
+ : "memory" );
+ do_vpaes_ssse3_enc (ctx, nrounds);
+ asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+ : [dst] "=m" (*dst)
+ :
+ : "memory" );
+ vpaes_ssse3_cleanup ();
+ return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare_enc ();
+
+ asm volatile ("movdqu %[iv], %%xmm0\n\t"
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_vpaes_ssse3_enc (ctx, nrounds);
+
+ asm volatile ("movdqu %[inbuf], %%xmm1\n\t"
+ "pxor %%xmm1, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm0, %[iv]\n\t"
+ : [iv] "=m" (*iv)
+ :
+ : "memory" );
+
+ vpaes_ssse3_cleanup ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks, int cbc_mac)
+{
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare_enc ();
+
+ asm volatile ("movdqu %[iv], %%xmm7\n\t"
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ asm volatile ("movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm7, %%xmm0\n\t"
+ : /* No output */
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_vpaes_ssse3_enc (ctx, nrounds);
+
+ asm volatile ("movdqa %%xmm0, %%xmm7\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ if (!cbc_mac)
+ outbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+ : [iv] "=m" (*iv)
+ :
+ : "memory" );
+
+ vpaes_ssse3_cleanup ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+ u64 ctrlow;
+
+ vpaes_ssse3_prepare_enc ();
+
+ asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
+ "movdqa (%[ctr]), %%xmm7\n\t" /* Preload CTR */
+ "movq 8(%[ctr]), %q[ctrlow]\n\t"
+ "bswapq %q[ctrlow]\n\t"
+ : [ctrlow] "=r" (ctrlow)
+ : [mask] "m" (*be_mask),
+ [ctr] "r" (ctr)
+ : "memory", "cc");
+
+ for ( ;nblocks; nblocks-- )
+ {
+ asm volatile ("movdqa %%xmm7, %%xmm0\n\t" /* xmm0 := CTR (xmm7) */
+ "pcmpeqd %%xmm1, %%xmm1\n\t"
+ "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */
+
+ "pshufb %%xmm6, %%xmm7\n\t"
+ "psubq %%xmm1, %%xmm7\n\t" /* xmm7++ (big endian) */
+
+ /* detect if 64-bit carry handling is needed */
+ "incq %q[ctrlow]\n\t"
+ "jnz .Lno_carry%=\n\t"
+
+ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */
+ "psubq %%xmm1, %%xmm7\n\t" /* add carry to upper 64bits */
+
+ ".Lno_carry%=:\n\t"
+
+ "pshufb %%xmm6, %%xmm7\n\t"
+ : [ctrlow] "+r" (ctrlow)
+ :
+ : "cc", "memory");
+
+ do_vpaes_ssse3_enc (ctx, nrounds);
+
+ asm volatile ("movdqu %[src], %%xmm1\n\t" /* xmm1 := input */
+ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */
+ "movdqu %%xmm0, %[dst]" /* Store EncCTR. */
+ : [dst] "=m" (*outbuf)
+ : [src] "m" (*inbuf)
+ : "memory");
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm7, %[ctr]\n\t" /* Update CTR (mem). */
+ : [ctr] "=m" (*ctr)
+ :
+ : "memory" );
+
+ vpaes_ssse3_cleanup ();
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+ const unsigned char *src)
+{
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare_dec ();
+ asm volatile ("movdqu %[src], %%xmm0\n\t"
+ :
+ : [src] "m" (*src)
+ : "memory" );
+ do_vpaes_ssse3_dec (ctx, nrounds);
+ asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+ : [dst] "=m" (*dst)
+ :
+ : "memory" );
+ vpaes_ssse3_cleanup ();
+ return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare_enc ();
+
+ asm volatile ("movdqu %[iv], %%xmm0\n\t"
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_vpaes_ssse3_enc (ctx, nrounds);
+
+ asm volatile ("movdqa %%xmm0, %%xmm6\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "movdqu %%xmm6, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ : [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm0, %[iv]\n\t"
+ : [iv] "=m" (*iv)
+ :
+ : "memory" );
+
+ vpaes_ssse3_cleanup ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+ unsigned char *outbuf, const unsigned char *inbuf,
+ size_t nblocks)
+{
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ if ( !ctx->decryption_prepared )
+ {
+ do_ssse3_prepare_decryption ( ctx, ssse3_state );
+ ctx->decryption_prepared = 1;
+ }
+
+ vpaes_ssse3_prepare_dec ();
+
+ asm volatile ("movdqu %[iv], %%xmm7\n\t" /* use xmm7 as fast IV storage */
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory");
+
+ for ( ;nblocks; nblocks-- )
+ {
+ asm volatile ("movdqu %[inbuf], %%xmm0\n\t"
+ "movdqa %%xmm0, %%xmm6\n\t" /* use xmm6 as savebuf */
+ : /* No output */
+ : [inbuf] "m" (*inbuf)
+ : "memory");
+
+ do_vpaes_ssse3_dec (ctx, nrounds);
+
+ asm volatile ("pxor %%xmm7, %%xmm0\n\t" /* xor IV with output */
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ "movdqu %%xmm6, %%xmm7\n\t" /* store savebuf as new IV */
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory");
+
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ asm volatile ("movdqu %%xmm7, %[iv]\n\t" /* store IV */
+ : /* No output */
+ : [iv] "m" (*iv)
+ : "memory");
+
+ vpaes_ssse3_cleanup ();
+}
+
+
+static void ASM_FUNC_ATTR
+ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare_enc ();
+
+ /* Preload Offset and Checksum */
+ asm volatile ("movdqu %[iv], %%xmm7\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv),
+ [ctr] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm7\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "pxor %%xmm7, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_vpaes_ssse3_enc (ctx, nrounds);
+
+ asm volatile ("pxor %%xmm7, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_iv.iv),
+ [ctr] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+
+ vpaes_ssse3_cleanup ();
+}
+
+static void ASM_FUNC_ATTR
+ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ if ( !ctx->decryption_prepared )
+ {
+ do_ssse3_prepare_decryption ( ctx, ssse3_state );
+ ctx->decryption_prepared = 1;
+ }
+
+ vpaes_ssse3_prepare_dec ();
+
+ /* Preload Offset and Checksum */
+ asm volatile ("movdqu %[iv], %%xmm7\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv),
+ [ctr] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm7\n\t"
+ "pxor %%xmm7, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_vpaes_ssse3_dec (ctx, nrounds);
+
+ asm volatile ("pxor %%xmm7, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_iv.iv),
+ [ctr] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+
+ vpaes_ssse3_cleanup ();
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ if (encrypt)
+ ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+ else
+ ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+
+ return 0;
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ u64 n = c->u_mode.ocb.aad_nblocks;
+ unsigned int nrounds = ctx->rounds;
+ byte ssse3_state[SSSE3_STATE_SIZE];
+
+ vpaes_ssse3_prepare_enc ();
+
+ /* Preload Offset and Sum */
+ asm volatile ("movdqu %[iv], %%xmm7\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "m" (*c->u_mode.ocb.aad_sum)
+ : "memory" );
+
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = aes_ocb_get_l(c, ++n);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[abuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm7\n\t"
+ "pxor %%xmm7, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [abuf] "m" (*abuf)
+ : "memory" );
+
+ do_vpaes_ssse3_enc (ctx, nrounds);
+
+ asm volatile ("pxor %%xmm0, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.aad_nblocks = n;
+ asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "=m" (*c->u_mode.ocb.aad_sum)
+ :
+ : "memory" );
+
+ vpaes_ssse3_cleanup ();
+
+ return 0;
+}
+
+#if __clang__
+# pragma clang attribute pop
+#endif
+
+#endif /* USE_SSSE3 */
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-tables.h b/comm/third_party/libgcrypt/cipher/rijndael-tables.h
new file mode 100644
index 0000000000..b54d959393
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-tables.h
@@ -0,0 +1,227 @@
+/* rijndael-tables.h - Rijndael (AES) for GnuPG,
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ * 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* To keep the actual implementation at a readable size we use this
+ include file to define the tables. */
+
+static struct
+{
+ volatile u32 counter_head;
+ u32 cacheline_align[64 / 4 - 1];
+ u32 T[256];
+ volatile u32 counter_tail;
+} enc_tables ATTR_ALIGNED_64 =
+ {
+ 0,
+ { 0, },
+ {
+ 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
+ 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
+ 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
+ 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
+ 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa,
+ 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+ 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45,
+ 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
+ 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
+ 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
+ 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9,
+ 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+ 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d,
+ 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
+ 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
+ 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
+ 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34,
+ 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+ 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d,
+ 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
+ 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
+ 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
+ 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972,
+ 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+ 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed,
+ 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
+ 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
+ 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
+ 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05,
+ 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+ 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142,
+ 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
+ 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
+ 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
+ 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a,
+ 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+ 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3,
+ 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
+ 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
+ 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
+ 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14,
+ 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+ 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4,
+ 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
+ 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
+ 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
+ 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf,
+ 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+ 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c,
+ 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
+ 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
+ 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
+ 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc,
+ 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+ 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969,
+ 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
+ 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
+ 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
+ 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9,
+ 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+ 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a,
+ 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
+ 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
+ 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c
+ },
+ 0
+ };
+
+#define encT enc_tables.T
+
+static struct
+{
+ volatile u32 counter_head;
+ u32 cacheline_align[64 / 4 - 1];
+ u32 T[256];
+ byte inv_sbox[256];
+ volatile u32 counter_tail;
+} dec_tables ATTR_ALIGNED_64 =
+ {
+ 0,
+ { 0, },
+ {
+ 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
+ 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
+ 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
+ 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5,
+ 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d,
+ 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
+ 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295,
+ 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e,
+ 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
+ 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d,
+ 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362,
+ 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
+ 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52,
+ 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566,
+ 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
+ 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed,
+ 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e,
+ 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
+ 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4,
+ 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd,
+ 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
+ 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060,
+ 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967,
+ 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
+ 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000,
+ 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c,
+ 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
+ 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624,
+ 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b,
+ 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
+ 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12,
+ 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14,
+ 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
+ 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b,
+ 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8,
+ 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
+ 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7,
+ 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177,
+ 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
+ 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322,
+ 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498,
+ 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
+ 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54,
+ 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382,
+ 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
+ 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb,
+ 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83,
+ 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
+ 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029,
+ 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235,
+ 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
+ 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117,
+ 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4,
+ 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
+ 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb,
+ 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d,
+ 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
+ 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a,
+ 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773,
+ 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
+ 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2,
+ 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff,
+ 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
+ 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0
+ },
+ {
+ 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
+ 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
+ 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
+ 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
+ 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
+ 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
+ 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
+ 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
+ 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
+ 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
+ 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
+ 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
+ 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
+ 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
+ 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
+ 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
+ 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
+ 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
+ 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
+ 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
+ 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
+ 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
+ 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
+ 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
+ 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
+ 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
+ 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
+ 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
+ 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
+ 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
+ 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
+ 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+ },
+ 0
+ };
+
+#define decT dec_tables.T
+#define inv_sbox dec_tables.inv_sbox
+
+static const u32 rcon[30] =
+ {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c,
+ 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,
+ 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91
+ };
diff --git a/comm/third_party/libgcrypt/cipher/rijndael.c b/comm/third_party/libgcrypt/cipher/rijndael.c
new file mode 100644
index 0000000000..fe137327e7
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael.c
@@ -0,0 +1,2032 @@
+/* Rijndael (AES) for GnuPG
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ * 2008, 2011, 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *******************************************************************
+ * The code here is based on the optimized implementation taken from
+ * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000,
+ * which carries this notice:
+ *------------------------------------------
+ * rijndael-alg-fst.c v2.3 April '2000
+ *
+ * Optimised ANSI C code
+ *
+ * authors: v1.0: Antoon Bosselaers
+ * v2.0: Vincent Rijmen
+ * v2.3: Paulo Barreto
+ *
+ * This code is placed in the public domain.
+ *------------------------------------------
+ *
+ * The SP800-38a document is available at:
+ * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_AMD64_ASM
+/* AMD64 assembly implementations of AES */
+extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc,
+ unsigned char *out,
+ const unsigned char *in,
+ int rounds,
+ const void *encT);
+
+extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec,
+ unsigned char *out,
+ const unsigned char *in,
+ int rounds,
+ const void *decT);
+#endif /*USE_AMD64_ASM*/
+
+#ifdef USE_AESNI
+/* AES-NI (AMD64 & i386) accelerated implementations of AES */
+extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac);
+extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+#endif
+
+#ifdef USE_SSSE3
+/* SSSE3 (AMD64) vector permutation implementation of AES */
+extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks,
+ int cbc_mac);
+extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
+#endif
+
+#ifdef USE_PADLOCK
+extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx,
+ unsigned char *bx,
+ const unsigned char *ax);
+extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx,
+ unsigned char *bx,
+ const unsigned char *ax);
+extern void _gcry_aes_padlock_prepare_decryption (RIJNDAEL_context *ctx);
+#endif
+
+#ifdef USE_ARM_ASM
+/* ARM assembly implementations of AES */
+extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc,
+ unsigned char *out,
+ const unsigned char *in,
+ int rounds,
+ const void *encT);
+
+extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec,
+ unsigned char *out,
+ const unsigned char *in,
+ int rounds,
+ const void *decT);
+#endif /*USE_ARM_ASM*/
+
+#ifdef USE_ARM_CE
+/* ARMv8 Crypto Extension implementations of AES */
+extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+
+extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks,
+ int cbc_mac);
+extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c,
+ const void *abuf_arg, size_t nblocks);
+extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+#endif /*USE_ARM_ASM*/
+
+#ifdef USE_PPC_CRYPTO
+/* PowerPC Crypto implementations of AES */
+extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+
+extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+
+extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c,
+ const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+#endif /*USE_PPC_CRYPTO*/
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+/* Power9 little-endian crypto implementations of AES */
+extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+
+extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+
+extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c,
+ const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+
+#ifdef USE_S390X_CRYPTO
+/* zSeries crypto implementations of AES */
+extern int _gcry_aes_s390x_setup_acceleration(RIJNDAEL_context *ctx,
+ unsigned int keylen,
+ unsigned int hwfeatures,
+ cipher_bulk_ops_t *bulk_ops);
+extern void _gcry_aes_s390x_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_s390x_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_s390x_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_s390x_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+
+#endif /*USE_S390X_CRYPTO*/
+
+static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
+ const unsigned char *ax);
+static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
+ const unsigned char *ax);
+
+static void _gcry_aes_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf, const void *inbuf,
+ size_t nblocks);
+static void _gcry_aes_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_aes_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac);
+static void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+static size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
+static void _gcry_aes_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+
+
+/* All the numbers. */
+#include "rijndael-tables.h"
+
+
+
+
+/* Function prototypes. */
+static const char *selftest(void);
+static void prepare_decryption(RIJNDAEL_context *ctx);
+
+
+
+/* Prefetching for encryption/decryption tables. */
+static inline void prefetch_table(const volatile byte *tab, size_t len)
+{
+ size_t i;
+
+ for (i = 0; len - i >= 8 * 32; i += 8 * 32)
+ {
+ (void)tab[i + 0 * 32];
+ (void)tab[i + 1 * 32];
+ (void)tab[i + 2 * 32];
+ (void)tab[i + 3 * 32];
+ (void)tab[i + 4 * 32];
+ (void)tab[i + 5 * 32];
+ (void)tab[i + 6 * 32];
+ (void)tab[i + 7 * 32];
+ }
+ for (; i < len; i += 32)
+ {
+ (void)tab[i];
+ }
+
+ (void)tab[len - 1];
+}
+
+static void prefetch_enc(void)
+{
+ /* Modify counters to trigger copy-on-write and unsharing if physical pages
+ * of look-up table are shared between processes. Modifying counters also
+ * causes checksums for pages to change and hint same-page merging algorithm
+ * that these pages are frequently changing. */
+ enc_tables.counter_head++;
+ enc_tables.counter_tail++;
+
+ /* Prefetch look-up tables to cache. */
+ prefetch_table((const void *)&enc_tables, sizeof(enc_tables));
+}
+
+static void prefetch_dec(void)
+{
+ /* Modify counters to trigger copy-on-write and unsharing if physical pages
+ * of look-up table are shared between processes. Modifying counters also
+ * causes checksums for pages to change and hint same-page merging algorithm
+ * that these pages are frequently changing. */
+ dec_tables.counter_head++;
+ dec_tables.counter_tail++;
+
+ /* Prefetch look-up tables to cache. */
+ prefetch_table((const void *)&dec_tables, sizeof(dec_tables));
+}
+
+
+
+/* Perform the key setup. */
+static gcry_err_code_t
+do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ static int initialized = 0;
+ static const char *selftest_failed = 0;
+ void (*hw_setkey)(RIJNDAEL_context *ctx, const byte *key) = NULL;
+ int rounds;
+ int i,j, r, t, rconpointer = 0;
+ int KC;
+ unsigned int hwfeatures;
+
+ /* The on-the-fly self tests are only run in non-fips mode. In fips
+ mode explicit self-tests are required. Actually the on-the-fly
+ self-tests are not fully thread-safe and it might happen that a
+ failed self-test won't get noticed in another thread.
+
+ FIXME: We might want to have a central registry of succeeded
+ self-tests. */
+ if (!fips_mode () && !initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+ if (selftest_failed)
+ log_error ("%s\n", selftest_failed );
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if( keylen == 128/8 )
+ {
+ rounds = 10;
+ KC = 4;
+ }
+ else if ( keylen == 192/8 )
+ {
+ rounds = 12;
+ KC = 6;
+ }
+ else if ( keylen == 256/8 )
+ {
+ rounds = 14;
+ KC = 8;
+ }
+ else
+ return GPG_ERR_INV_KEYLEN;
+
+ ctx->rounds = rounds;
+ hwfeatures = _gcry_get_hw_features ();
+
+ ctx->decryption_prepared = 0;
+
+ /* Setup default bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cfb_enc = _gcry_aes_cfb_enc;
+ bulk_ops->cfb_dec = _gcry_aes_cfb_dec;
+ bulk_ops->cbc_enc = _gcry_aes_cbc_enc;
+ bulk_ops->cbc_dec = _gcry_aes_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_aes_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_aes_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_aes_ocb_auth;
+ bulk_ops->xts_crypt = _gcry_aes_xts_crypt;
+
+ (void)hwfeatures;
+
+ if (0)
+ {
+ ;
+ }
+#ifdef USE_AESNI
+ else if (hwfeatures & HWF_INTEL_AESNI)
+ {
+ hw_setkey = _gcry_aes_aesni_do_setkey;
+ ctx->encrypt_fn = _gcry_aes_aesni_encrypt;
+ ctx->decrypt_fn = _gcry_aes_aesni_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->prepare_decryption = _gcry_aes_aesni_prepare_decryption;
+ ctx->use_avx = !!(hwfeatures & HWF_INTEL_AVX);
+ ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2);
+
+ /* Setup AES-NI bulk encryption routines. */
+ bulk_ops->cfb_enc = _gcry_aes_aesni_cfb_enc;
+ bulk_ops->cfb_dec = _gcry_aes_aesni_cfb_dec;
+ bulk_ops->cbc_enc = _gcry_aes_aesni_cbc_enc;
+ bulk_ops->cbc_dec = _gcry_aes_aesni_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_aes_aesni_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_aes_aesni_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_aes_aesni_ocb_auth;
+ bulk_ops->xts_crypt = _gcry_aes_aesni_xts_crypt;
+ }
+#endif
+#ifdef USE_PADLOCK
+ else if (hwfeatures & HWF_PADLOCK_AES && keylen == 128/8)
+ {
+ ctx->encrypt_fn = _gcry_aes_padlock_encrypt;
+ ctx->decrypt_fn = _gcry_aes_padlock_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->prepare_decryption = _gcry_aes_padlock_prepare_decryption;
+ memcpy (ctx->padlockkey, key, keylen);
+ }
+#endif
+#ifdef USE_SSSE3
+ else if (hwfeatures & HWF_INTEL_SSSE3)
+ {
+ hw_setkey = _gcry_aes_ssse3_do_setkey;
+ ctx->encrypt_fn = _gcry_aes_ssse3_encrypt;
+ ctx->decrypt_fn = _gcry_aes_ssse3_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->prepare_decryption = _gcry_aes_ssse3_prepare_decryption;
+
+ /* Setup SSSE3 bulk encryption routines. */
+ bulk_ops->cfb_enc = _gcry_aes_ssse3_cfb_enc;
+ bulk_ops->cfb_dec = _gcry_aes_ssse3_cfb_dec;
+ bulk_ops->cbc_enc = _gcry_aes_ssse3_cbc_enc;
+ bulk_ops->cbc_dec = _gcry_aes_ssse3_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_aes_ssse3_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_aes_ssse3_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_aes_ssse3_ocb_auth;
+ }
+#endif
+#ifdef USE_ARM_CE
+ else if (hwfeatures & HWF_ARM_AES)
+ {
+ hw_setkey = _gcry_aes_armv8_ce_setkey;
+ ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt;
+ ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->prepare_decryption = _gcry_aes_armv8_ce_prepare_decryption;
+
+ /* Setup ARM-CE bulk encryption routines. */
+ bulk_ops->cfb_enc = _gcry_aes_armv8_ce_cfb_enc;
+ bulk_ops->cfb_dec = _gcry_aes_armv8_ce_cfb_dec;
+ bulk_ops->cbc_enc = _gcry_aes_armv8_ce_cbc_enc;
+ bulk_ops->cbc_dec = _gcry_aes_armv8_ce_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_aes_armv8_ce_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_aes_armv8_ce_ocb_auth;
+ bulk_ops->xts_crypt = _gcry_aes_armv8_ce_xts_crypt;
+ }
+#endif
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00))
+ {
+ hw_setkey = _gcry_aes_ppc8_setkey;
+ ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt;
+ ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption;
+
+ /* Setup PPC9LE bulk encryption routines. */
+ bulk_ops->cfb_enc = _gcry_aes_ppc9le_cfb_enc;
+ bulk_ops->cfb_dec = _gcry_aes_ppc9le_cfb_dec;
+ bulk_ops->cbc_enc = _gcry_aes_ppc9le_cbc_enc;
+ bulk_ops->cbc_dec = _gcry_aes_ppc9le_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_aes_ppc9le_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_aes_ppc9le_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_aes_ppc9le_ocb_auth;
+ bulk_ops->xts_crypt = _gcry_aes_ppc9le_xts_crypt;
+ }
+#endif
+#ifdef USE_PPC_CRYPTO
+ else if (hwfeatures & HWF_PPC_VCRYPTO)
+ {
+ hw_setkey = _gcry_aes_ppc8_setkey;
+ ctx->encrypt_fn = _gcry_aes_ppc8_encrypt;
+ ctx->decrypt_fn = _gcry_aes_ppc8_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption;
+
+ /* Setup PPC8 bulk encryption routines. */
+ bulk_ops->cfb_enc = _gcry_aes_ppc8_cfb_enc;
+ bulk_ops->cfb_dec = _gcry_aes_ppc8_cfb_dec;
+ bulk_ops->cbc_enc = _gcry_aes_ppc8_cbc_enc;
+ bulk_ops->cbc_dec = _gcry_aes_ppc8_cbc_dec;
+ bulk_ops->ctr_enc = _gcry_aes_ppc8_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_aes_ppc8_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_aes_ppc8_ocb_auth;
+ bulk_ops->xts_crypt = _gcry_aes_ppc8_xts_crypt;
+ }
+#endif
+#ifdef USE_S390X_CRYPTO
+ else if (_gcry_aes_s390x_setup_acceleration (ctx, keylen, hwfeatures,
+ bulk_ops))
+ {
+ hw_setkey = _gcry_aes_s390x_setkey;
+ ctx->encrypt_fn = _gcry_aes_s390x_encrypt;
+ ctx->decrypt_fn = _gcry_aes_s390x_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->prepare_decryption = _gcry_aes_s390x_prepare_decryption;
+ }
+#endif
+ else
+ {
+ ctx->encrypt_fn = do_encrypt;
+ ctx->decrypt_fn = do_decrypt;
+ ctx->prefetch_enc_fn = prefetch_enc;
+ ctx->prefetch_dec_fn = prefetch_dec;
+ ctx->prepare_decryption = prepare_decryption;
+ }
+
+ /* NB: We don't yet support Padlock hardware key generation. */
+
+ if (hw_setkey)
+ {
+ hw_setkey (ctx, key);
+ }
+ else
+ {
+ const byte *sbox = ((const byte *)encT) + 1;
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ byte data[MAXKC][4];
+ u32 data32[MAXKC];
+ } tkk[2];
+#define k tkk[0].data
+#define k_u32 tkk[0].data32
+#define tk tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W (ctx->keyschenc)
+#define W_u32 (ctx->keyschenc32)
+
+ prefetch_enc();
+
+ for (i = 0; i < keylen; i++)
+ {
+ k[i >> 2][i & 3] = key[i];
+ }
+
+ for (j = KC-1; j >= 0; j--)
+ {
+ tk_u32[j] = k_u32[j];
+ }
+ r = 0;
+ t = 0;
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+
+ while (r < rounds + 1)
+ {
+ /* While not enough round key material calculated calculate
+ new values. */
+ tk[0][0] ^= sbox[tk[KC-1][1] * 4];
+ tk[0][1] ^= sbox[tk[KC-1][2] * 4];
+ tk[0][2] ^= sbox[tk[KC-1][3] * 4];
+ tk[0][3] ^= sbox[tk[KC-1][0] * 4];
+ tk[0][0] ^= rcon[rconpointer++];
+
+ if (KC != 8)
+ {
+ for (j = 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+ else
+ {
+ for (j = 1; j < KC/2; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ tk[KC/2][0] ^= sbox[tk[KC/2 - 1][0] * 4];
+ tk[KC/2][1] ^= sbox[tk[KC/2 - 1][1] * 4];
+ tk[KC/2][2] ^= sbox[tk[KC/2 - 1][2] * 4];
+ tk[KC/2][3] ^= sbox[tk[KC/2 - 1][3] * 4];
+ for (j = KC/2 + 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+ }
+#undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+ wipememory(&tkk, sizeof(tkk));
+ }
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+rijndael_setkey (void *context, const byte *key, const unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ RIJNDAEL_context *ctx = context;
+ return do_setkey (ctx, key, keylen, bulk_ops);
+}
+
+
+/* Make a decryption key from an encryption key. */
+static void
+prepare_decryption( RIJNDAEL_context *ctx )
+{
+ const byte *sbox = ((const byte *)encT) + 1;
+ int r;
+
+ prefetch_enc();
+ prefetch_dec();
+
+ ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0];
+ ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1];
+ ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2];
+ ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3];
+
+ for (r = 1; r < ctx->rounds; r++)
+ {
+ u32 *wi = ctx->keyschenc32[r];
+ u32 *wo = ctx->keyschdec32[r];
+ u32 wt;
+
+ wt = wi[0];
+ wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+ ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+ ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+ ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
+
+ wt = wi[1];
+ wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+ ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+ ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+ ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
+
+ wt = wi[2];
+ wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+ ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+ ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+ ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
+
+ wt = wi[3];
+ wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+ ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+ ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+ ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
+ }
+
+ ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0];
+ ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1];
+ ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2];
+ ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3];
+}
+
+
+#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM)
+/* Encrypt one block. A and B may be the same. */
+static unsigned int
+do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b,
+ const unsigned char *a)
+{
+#define rk (ctx->keyschenc32)
+ const byte *sbox = ((const byte *)encT) + 1;
+ int rounds = ctx->rounds;
+ int r;
+ u32 sa[4];
+ u32 sb[4];
+
+ sb[0] = buf_get_le32(a + 0);
+ sb[1] = buf_get_le32(a + 4);
+ sb[2] = buf_get_le32(a + 8);
+ sb[3] = buf_get_le32(a + 12);
+
+ sa[0] = sb[0] ^ rk[0][0];
+ sa[1] = sb[1] ^ rk[0][1];
+ sa[2] = sb[2] ^ rk[0][2];
+ sa[3] = sb[3] ^ rk[0][3];
+
+ sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+ sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+ sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+ sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+ sa[0] = rk[1][0] ^ sb[0];
+
+ sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+ sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+ sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+ sa[1] = rk[1][1] ^ sb[1];
+
+ sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+ sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+ sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+ sa[2] = rk[1][2] ^ sb[2];
+
+ sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+ sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+ sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+ sa[3] = rk[1][3] ^ sb[3];
+
+ for (r = 2; r < rounds; r++)
+ {
+ sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+ sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+ sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+ sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+ sa[0] = rk[r][0] ^ sb[0];
+
+ sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+ sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+ sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+ sa[1] = rk[r][1] ^ sb[1];
+
+ sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+ sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+ sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+ sa[2] = rk[r][2] ^ sb[2];
+
+ sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+ sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+ sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+ sa[3] = rk[r][3] ^ sb[3];
+
+ r++;
+
+ sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+ sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+ sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+ sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+ sa[0] = rk[r][0] ^ sb[0];
+
+ sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+ sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+ sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+ sa[1] = rk[r][1] ^ sb[1];
+
+ sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+ sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+ sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+ sa[2] = rk[r][2] ^ sb[2];
+
+ sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+ sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+ sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+ sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+ sa[3] = rk[r][3] ^ sb[3];
+ }
+
+ /* Last round is special. */
+
+ sb[0] = ((u32)sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8);
+ sb[3] = ((u32)sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8);
+ sb[2] = ((u32)sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8);
+ sb[1] = ((u32)sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8);
+ sa[0] = rk[r][0] ^ sb[0];
+
+ sb[1] ^= ((u32)sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8);
+ sa[0] ^= ((u32)sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8);
+ sb[3] ^= ((u32)sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8);
+ sb[2] ^= ((u32)sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8);
+ sa[1] = rk[r][1] ^ sb[1];
+
+ sb[2] ^= ((u32)sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8);
+ sa[1] ^= ((u32)sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8);
+ sa[0] ^= ((u32)sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8);
+ sb[3] ^= ((u32)sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8);
+ sa[2] = rk[r][2] ^ sb[2];
+
+ sb[3] ^= ((u32)sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8);
+ sa[2] ^= ((u32)sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8);
+ sa[1] ^= ((u32)sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8);
+ sa[0] ^= ((u32)sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8);
+ sa[3] = rk[r][3] ^ sb[3];
+
+ buf_put_le32(b + 0, sa[0]);
+ buf_put_le32(b + 4, sa[1]);
+ buf_put_le32(b + 8, sa[2]);
+ buf_put_le32(b + 12, sa[3]);
+#undef rk
+
+ return (56 + 2*sizeof(int));
+}
+#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/
+
+
+static unsigned int
+do_encrypt (const RIJNDAEL_context *ctx,
+ unsigned char *bx, const unsigned char *ax)
+{
+#ifdef USE_AMD64_ASM
+ return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds,
+ enc_tables.T);
+#elif defined(USE_ARM_ASM)
+ return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds,
+ enc_tables.T);
+#else
+ return do_encrypt_fn (ctx, bx, ax);
+#endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/
+}
+
+
+static unsigned int
+rijndael_encrypt (void *context, byte *b, const byte *a)
+{
+ RIJNDAEL_context *ctx = context;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ return ctx->encrypt_fn (ctx, b, a);
+}
+
+
+/* Bulk encryption of complete blocks in CFB mode. Caller needs to
+ make sure that IV is aligned on an unsigned long boundary. This
+ function is only intended for the bulk encryption feature of
+ cipher.c. */
+static void
+_gcry_aes_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn_depth = 0;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the IV. */
+ burn_depth = encrypt_fn (ctx, iv, iv);
+ /* XOR the input with the IV and store input into IV. */
+ cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE);
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+/* Bulk encryption of complete blocks in CBC mode. Caller needs to
+ make sure that IV is aligned on an unsigned long boundary. This
+ function is only intended for the bulk encryption feature of
+ cipher.c. */
+static void
+_gcry_aes_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char *last_iv;
+ unsigned int burn_depth = 0;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ last_iv = iv;
+
+ for ( ;nblocks; nblocks-- )
+ {
+ cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE);
+
+ burn_depth = encrypt_fn (ctx, outbuf, outbuf);
+
+ last_iv = outbuf;
+ inbuf += BLOCKSIZE;
+ if (!cbc_mac)
+ outbuf += BLOCKSIZE;
+ }
+
+ if (last_iv != iv)
+ cipher_block_cpy (iv, last_iv, BLOCKSIZE);
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+/* Bulk encryption of complete blocks in CTR mode. Caller needs to
+ make sure that CTR is aligned on a 16 byte boundary if AESNI; the
+ minimum alignment is for an u32. This function is only intended
+ for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size BLOCKSIZE. */
+static void
+_gcry_aes_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn_depth = 0;
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ burn_depth = encrypt_fn (ctx, tmp.x1, ctr);
+ /* XOR the input with the encrypted counter and store in output. */
+ cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE);
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ /* Increment the counter. */
+ cipher_block_add(ctr, 1, BLOCKSIZE);
+ }
+
+ wipememory(&tmp, sizeof(tmp));
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+
+#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM)
+/* Decrypt one block. A and B may be the same. */
+static unsigned int
+do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b,
+ const unsigned char *a)
+{
+#define rk (ctx->keyschdec32)
+ int rounds = ctx->rounds;
+ int r;
+ u32 sa[4];
+ u32 sb[4];
+
+ sb[0] = buf_get_le32(a + 0);
+ sb[1] = buf_get_le32(a + 4);
+ sb[2] = buf_get_le32(a + 8);
+ sb[3] = buf_get_le32(a + 12);
+
+ sa[0] = sb[0] ^ rk[rounds][0];
+ sa[1] = sb[1] ^ rk[rounds][1];
+ sa[2] = sb[2] ^ rk[rounds][2];
+ sa[3] = sb[3] ^ rk[rounds][3];
+
+ for (r = rounds - 1; r > 1; r--)
+ {
+ sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+ sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+ sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+ sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+ sa[0] = rk[r][0] ^ sb[0];
+
+ sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+ sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+ sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+ sa[1] = rk[r][1] ^ sb[1];
+
+ sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+ sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+ sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+ sa[2] = rk[r][2] ^ sb[2];
+
+ sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+ sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+ sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+ sa[3] = rk[r][3] ^ sb[3];
+
+ r--;
+
+ sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+ sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+ sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+ sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+ sa[0] = rk[r][0] ^ sb[0];
+
+ sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+ sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+ sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+ sa[1] = rk[r][1] ^ sb[1];
+
+ sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+ sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+ sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+ sa[2] = rk[r][2] ^ sb[2];
+
+ sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+ sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+ sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+ sa[3] = rk[r][3] ^ sb[3];
+ }
+
+ sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+ sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+ sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+ sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+ sa[0] = rk[1][0] ^ sb[0];
+
+ sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+ sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+ sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+ sa[1] = rk[1][1] ^ sb[1];
+
+ sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+ sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+ sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+ sa[2] = rk[1][2] ^ sb[2];
+
+ sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+ sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+ sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+ sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+ sa[3] = rk[1][3] ^ sb[3];
+
+ /* Last round is special. */
+ sb[0] = (u32)inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8);
+ sb[1] = (u32)inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8);
+ sb[2] = (u32)inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8);
+ sb[3] = (u32)inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8);
+ sa[0] = sb[0] ^ rk[0][0];
+
+ sb[1] ^= (u32)inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8);
+ sb[2] ^= (u32)inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8);
+ sb[3] ^= (u32)inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8);
+ sa[0] ^= (u32)inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8);
+ sa[1] = sb[1] ^ rk[0][1];
+
+ sb[2] ^= (u32)inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8);
+ sb[3] ^= (u32)inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8);
+ sa[0] ^= (u32)inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8);
+ sa[1] ^= (u32)inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8);
+ sa[2] = sb[2] ^ rk[0][2];
+
+ sb[3] ^= (u32)inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8);
+ sa[0] ^= (u32)inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8);
+ sa[1] ^= (u32)inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8);
+ sa[2] ^= (u32)inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8);
+ sa[3] = sb[3] ^ rk[0][3];
+
+ buf_put_le32(b + 0, sa[0]);
+ buf_put_le32(b + 4, sa[1]);
+ buf_put_le32(b + 8, sa[2]);
+ buf_put_le32(b + 12, sa[3]);
+#undef rk
+
+ return (56+2*sizeof(int));
+}
+#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/
+
+
+/* Decrypt one block. AX and BX may be the same. */
+static unsigned int
+do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
+ const unsigned char *ax)
+{
+#ifdef USE_AMD64_ASM
+ return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
+ dec_tables.T);
+#elif defined(USE_ARM_ASM)
+ return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
+ dec_tables.T);
+#else
+ return do_decrypt_fn (ctx, bx, ax);
+#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/
+}
+
+
+static inline void
+check_decryption_preparation (RIJNDAEL_context *ctx)
+{
+ if ( !ctx->decryption_prepared )
+ {
+ ctx->prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+}
+
+
+static unsigned int
+rijndael_decrypt (void *context, byte *b, const byte *a)
+{
+ RIJNDAEL_context *ctx = context;
+
+ check_decryption_preparation (ctx);
+
+ if (ctx->prefetch_dec_fn)
+ ctx->prefetch_dec_fn();
+
+ return ctx->decrypt_fn (ctx, b, a);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode. Caller needs to
+ make sure that IV is aligned on an unsigned long boundary. This
+ function is only intended for the bulk encryption feature of
+ cipher.c. */
+static void
+_gcry_aes_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn_depth = 0;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ for ( ;nblocks; nblocks-- )
+ {
+ burn_depth = encrypt_fn (ctx, iv, iv);
+ cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE);
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
+ }
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode. Caller needs to
+ make sure that IV is aligned on an unsigned long boundary. This
+ function is only intended for the bulk encryption feature of
+ cipher.c. */
+static void
+_gcry_aes_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn_depth = 0;
+ unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16;
+ rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
+
+ check_decryption_preparation (ctx);
+
+ if (ctx->prefetch_dec_fn)
+ ctx->prefetch_dec_fn();
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+
+ burn_depth = decrypt_fn (ctx, savebuf, inbuf);
+
+ cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE);
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn_depth = 0;
+
+ if (encrypt)
+ {
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ for ( ;nblocks; nblocks-- )
+ {
+ u64 i = ++c->u_mode.ocb.data_nblocks;
+ const unsigned char *l = ocb_get_l(c, i);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+ cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+ }
+ else
+ {
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
+
+ check_decryption_preparation (ctx);
+
+ if (ctx->prefetch_dec_fn)
+ ctx->prefetch_dec_fn();
+
+ for ( ;nblocks; nblocks-- )
+ {
+ u64 i = ++c->u_mode.ocb.data_nblocks;
+ const unsigned char *l = ocb_get_l(c, i);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+ cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+ cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+ }
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ unsigned int burn_depth = 0;
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ for ( ;nblocks; nblocks-- )
+ {
+ u64 i = ++c->u_mode.ocb.aad_nblocks;
+ const unsigned char *l = ocb_get_l(c, i);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf,
+ BLOCKSIZE);
+ burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+ cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
+
+ abuf += BLOCKSIZE;
+ }
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+
+ return 0;
+}
+
+
+/* Bulk encryption/decryption of complete blocks in XTS mode. */
+static void
+_gcry_aes_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn_depth = 0;
+ rijndael_cryptfn_t crypt_fn;
+ u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry;
+
+ if (encrypt)
+ {
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ crypt_fn = ctx->encrypt_fn;
+ }
+ else
+ {
+ check_decryption_preparation (ctx);
+
+ if (ctx->prefetch_dec_fn)
+ ctx->prefetch_dec_fn();
+
+ crypt_fn = ctx->decrypt_fn;
+ }
+
+ tweak_next_lo = buf_get_le64 (tweak + 0);
+ tweak_next_hi = buf_get_le64 (tweak + 8);
+
+ while (nblocks)
+ {
+ tweak_lo = tweak_next_lo;
+ tweak_hi = tweak_next_hi;
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo;
+ tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi;
+
+ buf_put_le64 (outbuf + 0, tmp_lo);
+ buf_put_le64 (outbuf + 8, tmp_hi);
+
+ /* Generate next tweak. */
+ carry = -(tweak_next_hi >> 63) & 0x87;
+ tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63);
+ tweak_next_lo = (tweak_next_lo << 1) ^ carry;
+
+ burn_depth = crypt_fn (ctx, outbuf, outbuf);
+
+ buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo);
+ buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi);
+
+ outbuf += GCRY_XTS_BLOCK_LEN;
+ inbuf += GCRY_XTS_BLOCK_LEN;
+ nblocks--;
+ }
+
+ buf_put_le64 (tweak + 0, tweak_next_lo);
+ buf_put_le64 (tweak + 8, tweak_next_hi);
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 5 * sizeof(void *));
+}
+
+
+/* Run the self-tests for AES 128. Returns NULL on success. */
+static const char*
+selftest_basic_128 (void)
+{
+ RIJNDAEL_context *ctx;
+ unsigned char *ctxmem;
+ unsigned char scratch[16];
+ cipher_bulk_ops_t bulk_ops;
+
+ /* The test vectors are from the AES supplied ones; more or less
+ randomly taken from ecb_tbl.txt (I=42,81,14) */
+#if 1
+ static const unsigned char plaintext_128[16] =
+ {
+ 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33,
+ 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A
+ };
+ static const unsigned char key_128[16] =
+ {
+ 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0,
+ 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA
+ };
+ static const unsigned char ciphertext_128[16] =
+ {
+ 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2,
+ 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD
+ };
+#else
+ /* Test vectors from fips-197, appendix C. */
+# warning debug test vectors in use
+ static const unsigned char plaintext_128[16] =
+ {
+ 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,
+ 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
+ };
+ static const unsigned char key_128[16] =
+ {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+ 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
+ /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */
+ /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */
+ };
+ static const unsigned char ciphertext_128[16] =
+ {
+ 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30,
+ 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a
+ };
+#endif
+
+ /* Because gcc/ld can only align the CTX struct on 8 bytes on the
+ stack, we need to allocate that context on the heap. */
+ ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+ if (!ctx)
+ return "failed to allocate memory";
+
+ rijndael_setkey (ctx, key_128, sizeof (key_128), &bulk_ops);
+ rijndael_encrypt (ctx, scratch, plaintext_128);
+ if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128)))
+ {
+ xfree (ctxmem);
+ return "AES-128 test encryption failed.";
+ }
+ rijndael_decrypt (ctx, scratch, scratch);
+ xfree (ctxmem);
+ if (memcmp (scratch, plaintext_128, sizeof (plaintext_128)))
+ return "AES-128 test decryption failed.";
+
+ return NULL;
+}
+
+/* Run the self-tests for AES 192. Returns NULL on success. */
+static const char*
+selftest_basic_192 (void)
+{
+ RIJNDAEL_context *ctx;
+ unsigned char *ctxmem;
+ unsigned char scratch[16];
+ cipher_bulk_ops_t bulk_ops;
+
+ static unsigned char plaintext_192[16] =
+ {
+ 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4,
+ 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72
+ };
+ static unsigned char key_192[24] =
+ {
+ 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C,
+ 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16,
+ 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20
+ };
+ static const unsigned char ciphertext_192[16] =
+ {
+ 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC,
+ 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA
+ };
+
+ ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+ if (!ctx)
+ return "failed to allocate memory";
+ rijndael_setkey (ctx, key_192, sizeof(key_192), &bulk_ops);
+ rijndael_encrypt (ctx, scratch, plaintext_192);
+ if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192)))
+ {
+ xfree (ctxmem);
+ return "AES-192 test encryption failed.";
+ }
+ rijndael_decrypt (ctx, scratch, scratch);
+ xfree (ctxmem);
+ if (memcmp (scratch, plaintext_192, sizeof (plaintext_192)))
+ return "AES-192 test decryption failed.";
+
+ return NULL;
+}
+
+
+/* Run the self-tests for AES 256. Returns NULL on success. */
+static const char*
+selftest_basic_256 (void)
+{
+ RIJNDAEL_context *ctx;
+ unsigned char *ctxmem;
+ unsigned char scratch[16];
+ cipher_bulk_ops_t bulk_ops;
+
+ static unsigned char plaintext_256[16] =
+ {
+ 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+ 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21
+ };
+ static unsigned char key_256[32] =
+ {
+ 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10,
+ 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A,
+ 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24,
+ 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E
+ };
+ static const unsigned char ciphertext_256[16] =
+ {
+ 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71,
+ 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3
+ };
+
+ ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+ if (!ctx)
+ return "failed to allocate memory";
+ rijndael_setkey (ctx, key_256, sizeof(key_256), &bulk_ops);
+ rijndael_encrypt (ctx, scratch, plaintext_256);
+ if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
+ {
+ xfree (ctxmem);
+ return "AES-256 test encryption failed.";
+ }
+ rijndael_decrypt (ctx, scratch, scratch);
+ xfree (ctxmem);
+ if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
+ return "AES-256 test decryption failed.";
+
+ return NULL;
+}
+
+
+/* Run the self-tests for AES-CTR-128, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+ const int nblocks = 8+1;
+ const int blocksize = BLOCKSIZE;
+ const int context_size = sizeof(RIJNDAEL_context);
+
+ return _gcry_selftest_helper_ctr("AES", &rijndael_setkey,
+ &rijndael_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for AES-CBC-128, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+ const int nblocks = 8+2;
+ const int blocksize = BLOCKSIZE;
+ const int context_size = sizeof(RIJNDAEL_context);
+
+ return _gcry_selftest_helper_cbc("AES", &rijndael_setkey,
+ &rijndael_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for AES-CFB-128, tests bulk CFB decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+ const int nblocks = 8+2;
+ const int blocksize = BLOCKSIZE;
+ const int context_size = sizeof(RIJNDAEL_context);
+
+ return _gcry_selftest_helper_cfb("AES", &rijndael_setkey,
+ &rijndael_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run all the self-tests and return NULL on success. This function
+ is used for the on-the-fly self-tests. */
+static const char *
+selftest (void)
+{
+ const char *r;
+
+ if ( (r = selftest_basic_128 ())
+ || (r = selftest_basic_192 ())
+ || (r = selftest_basic_256 ()) )
+ return r;
+
+ if ( (r = selftest_ctr_128 ()) )
+ return r;
+
+ if ( (r = selftest_cbc_128 ()) )
+ return r;
+
+ if ( (r = selftest_cfb_128 ()) )
+ return r;
+
+ return r;
+}
+
+
+/* SP800-38a.pdf for AES-128. */
+static const char *
+selftest_fips_128_38a (int requested_mode)
+{
+ static const struct tv
+ {
+ int mode;
+ const unsigned char key[16];
+ const unsigned char iv[16];
+ struct
+ {
+ const unsigned char input[16];
+ const unsigned char output[16];
+ } data[4];
+ } tv[2] =
+ {
+ {
+ GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */
+ { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
+ 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ {
+ { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
+ 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a },
+ { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20,
+ 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } },
+
+ { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
+ 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 },
+ { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f,
+ 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } },
+
+ { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11,
+ 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef },
+ { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40,
+ 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } },
+
+ { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17,
+ 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 },
+ { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e,
+ 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } }
+ }
+ },
+ {
+ GCRY_CIPHER_MODE_OFB,
+ { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
+ 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ {
+ { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
+ 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a },
+ { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20,
+ 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } },
+
+ { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
+ 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 },
+ { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03,
+ 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } },
+
+ { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11,
+ 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef },
+ { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6,
+ 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } },
+
+ { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17,
+ 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 },
+ { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78,
+ 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } },
+ }
+ }
+ };
+ unsigned char scratch[16];
+ gpg_error_t err;
+ int tvi, idx;
+ gcry_cipher_hd_t hdenc = NULL;
+ gcry_cipher_hd_t hddec = NULL;
+
+#define Fail(a) do { \
+ _gcry_cipher_close (hdenc); \
+ _gcry_cipher_close (hddec); \
+ return a; \
+ } while (0)
+
+ gcry_assert (sizeof tv[0].data[0].input == sizeof scratch);
+ gcry_assert (sizeof tv[0].data[0].output == sizeof scratch);
+
+ for (tvi=0; tvi < DIM (tv); tvi++)
+ if (tv[tvi].mode == requested_mode)
+ break;
+ if (tvi == DIM (tv))
+ Fail ("no test data for this mode");
+
+ err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0);
+ if (err)
+ Fail ("open");
+ err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0);
+ if (err)
+ Fail ("open");
+ err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key);
+ if (!err)
+ err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key);
+ if (err)
+ Fail ("set key");
+ err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv);
+ if (!err)
+ err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv);
+ if (err)
+ Fail ("set IV");
+ for (idx=0; idx < DIM (tv[tvi].data); idx++)
+ {
+ err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch,
+ tv[tvi].data[idx].input,
+ sizeof tv[tvi].data[idx].input);
+ if (err)
+ Fail ("encrypt command");
+ if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch))
+ Fail ("encrypt mismatch");
+ err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch,
+ tv[tvi].data[idx].output,
+ sizeof tv[tvi].data[idx].output);
+ if (err)
+ Fail ("decrypt command");
+ if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch))
+ Fail ("decrypt mismatch");
+ }
+
+#undef Fail
+ _gcry_cipher_close (hdenc);
+ _gcry_cipher_close (hddec);
+ return NULL;
+}
+
+
+/* Complete selftest for AES-128 with all modes and driver code. */
+static gpg_err_code_t
+selftest_fips_128 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "low-level";
+ errtxt = selftest_basic_128 ();
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "cfb";
+ errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB);
+ if (errtxt)
+ goto failed;
+
+ what = "ofb";
+ errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("cipher", GCRY_CIPHER_AES128, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+/* Complete selftest for AES-192. */
+static gpg_err_code_t
+selftest_fips_192 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ (void)extended; /* No extended tests available. */
+
+ what = "low-level";
+ errtxt = selftest_basic_192 ();
+ if (errtxt)
+ goto failed;
+
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("cipher", GCRY_CIPHER_AES192, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Complete selftest for AES-256. */
+static gpg_err_code_t
+selftest_fips_256 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ (void)extended; /* No extended tests available. */
+
+ what = "low-level";
+ errtxt = selftest_basic_256 ();
+ if (errtxt)
+ goto failed;
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("cipher", GCRY_CIPHER_AES256, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_CIPHER_AES128:
+ ec = selftest_fips_128 (extended, report);
+ break;
+ case GCRY_CIPHER_AES192:
+ ec = selftest_fips_192 (extended, report);
+ break;
+ case GCRY_CIPHER_AES256:
+ ec = selftest_fips_256 (extended, report);
+ break;
+ default:
+ ec = GPG_ERR_CIPHER_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+
+
+
+static const char *rijndael_names[] =
+ {
+ "RIJNDAEL",
+ "AES128",
+ "AES-128",
+ NULL
+ };
+
+static gcry_cipher_oid_spec_t rijndael_oids[] =
+ {
+ { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB },
+ { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC },
+ { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB },
+ { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_aes =
+ {
+ GCRY_CIPHER_AES, {0, 1},
+ "AES", rijndael_names, rijndael_oids, 16, 128,
+ sizeof (RIJNDAEL_context),
+ rijndael_setkey, rijndael_encrypt, rijndael_decrypt,
+ NULL, NULL,
+ run_selftests
+ };
+
+
+static const char *rijndael192_names[] =
+ {
+ "RIJNDAEL192",
+ "AES-192",
+ NULL
+ };
+
+static gcry_cipher_oid_spec_t rijndael192_oids[] =
+ {
+ { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB },
+ { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC },
+ { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB },
+ { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_aes192 =
+ {
+ GCRY_CIPHER_AES192, {0, 1},
+ "AES192", rijndael192_names, rijndael192_oids, 16, 192,
+ sizeof (RIJNDAEL_context),
+ rijndael_setkey, rijndael_encrypt, rijndael_decrypt,
+ NULL, NULL,
+ run_selftests
+ };
+
+
+static const char *rijndael256_names[] =
+ {
+ "RIJNDAEL256",
+ "AES-256",
+ NULL
+ };
+
+static gcry_cipher_oid_spec_t rijndael256_oids[] =
+ {
+ { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB },
+ { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC },
+ { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB },
+ { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_aes256 =
+ {
+ GCRY_CIPHER_AES256, {0, 1},
+ "AES256", rijndael256_names, rijndael256_oids, 16, 256,
+ sizeof (RIJNDAEL_context),
+ rijndael_setkey, rijndael_encrypt, rijndael_decrypt,
+ NULL, NULL,
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/rmd160.c b/comm/third_party/libgcrypt/cipher/rmd160.c
new file mode 100644
index 0000000000..e12ff0176f
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rmd160.c
@@ -0,0 +1,529 @@
+/* rmd160.c - RIPE-MD160
+ * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "hash-common.h"
+#include "cipher.h" /* Only used for the rmd160_hash_buffer() prototype. */
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+/*********************************
+ * RIPEMD-160 is not patented, see (as of 25.10.97)
+ * http://www.esat.kuleuven.ac.be/~bosselae/ripemd160.html
+ * Note that the code uses Little Endian byteorder, which is good for
+ * 386 etc, but we must add some conversion when used on a big endian box.
+ *
+ *
+ * Pseudo-code for RIPEMD-160
+ *
+ * RIPEMD-160 is an iterative hash function that operates on 32-bit words.
+ * The round function takes as input a 5-word chaining variable and a 16-word
+ * message block and maps this to a new chaining variable. All operations are
+ * defined on 32-bit words. Padding is identical to that of MD4.
+ *
+ *
+ * RIPEMD-160: definitions
+ *
+ *
+ * nonlinear functions at bit level: exor, mux, -, mux, -
+ *
+ * f(j, x, y, z) = x XOR y XOR z (0 <= j <= 15)
+ * f(j, x, y, z) = (x AND y) OR (NOT(x) AND z) (16 <= j <= 31)
+ * f(j, x, y, z) = (x OR NOT(y)) XOR z (32 <= j <= 47)
+ * f(j, x, y, z) = (x AND z) OR (y AND NOT(z)) (48 <= j <= 63)
+ * f(j, x, y, z) = x XOR (y OR NOT(z)) (64 <= j <= 79)
+ *
+ *
+ * added constants (hexadecimal)
+ *
+ * K(j) = 0x00000000 (0 <= j <= 15)
+ * K(j) = 0x5A827999 (16 <= j <= 31) int(2**30 x sqrt(2))
+ * K(j) = 0x6ED9EBA1 (32 <= j <= 47) int(2**30 x sqrt(3))
+ * K(j) = 0x8F1BBCDC (48 <= j <= 63) int(2**30 x sqrt(5))
+ * K(j) = 0xA953FD4E (64 <= j <= 79) int(2**30 x sqrt(7))
+ * K'(j) = 0x50A28BE6 (0 <= j <= 15) int(2**30 x cbrt(2))
+ * K'(j) = 0x5C4DD124 (16 <= j <= 31) int(2**30 x cbrt(3))
+ * K'(j) = 0x6D703EF3 (32 <= j <= 47) int(2**30 x cbrt(5))
+ * K'(j) = 0x7A6D76E9 (48 <= j <= 63) int(2**30 x cbrt(7))
+ * K'(j) = 0x00000000 (64 <= j <= 79)
+ *
+ *
+ * selection of message word
+ *
+ * r(j) = j (0 <= j <= 15)
+ * r(16..31) = 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8
+ * r(32..47) = 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12
+ * r(48..63) = 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2
+ * r(64..79) = 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13
+ * r0(0..15) = 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12
+ * r0(16..31)= 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2
+ * r0(32..47)= 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13
+ * r0(48..63)= 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14
+ * r0(64..79)= 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11
+ *
+ *
+ * amount for rotate left (rol)
+ *
+ * s(0..15) = 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8
+ * s(16..31) = 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12
+ * s(32..47) = 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5
+ * s(48..63) = 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12
+ * s(64..79) = 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6
+ * s'(0..15) = 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6
+ * s'(16..31)= 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11
+ * s'(32..47)= 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5
+ * s'(48..63)= 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8
+ * s'(64..79)= 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11
+ *
+ *
+ * initial value (hexadecimal)
+ *
+ * h0 = 0x67452301; h1 = 0xEFCDAB89; h2 = 0x98BADCFE; h3 = 0x10325476;
+ * h4 = 0xC3D2E1F0;
+ *
+ *
+ * RIPEMD-160: pseudo-code
+ *
+ * It is assumed that the message after padding consists of t 16-word blocks
+ * that will be denoted with X[i][j], with 0 <= i <= t-1 and 0 <= j <= 15.
+ * The symbol [+] denotes addition modulo 2**32 and rol_s denotes cyclic left
+ * shift (rotate) over s positions.
+ *
+ *
+ * for i := 0 to t-1 {
+ * A := h0; B := h1; C := h2; D = h3; E = h4;
+ * A' := h0; B' := h1; C' := h2; D' = h3; E' = h4;
+ * for j := 0 to 79 {
+ * T := rol_s(j)(A [+] f(j, B, C, D) [+] X[i][r(j)] [+] K(j)) [+] E;
+ * A := E; E := D; D := rol_10(C); C := B; B := T;
+ * T := rol_s'(j)(A' [+] f(79-j, B', C', D') [+] X[i][r'(j)]
+ [+] K'(j)) [+] E';
+ * A' := E'; E' := D'; D' := rol_10(C'); C' := B'; B' := T;
+ * }
+ * T := h1 [+] C [+] D'; h1 := h2 [+] D [+] E'; h2 := h3 [+] E [+] A';
+ * h3 := h4 [+] A [+] B'; h4 := h0 [+] B [+] C'; h0 := T;
+ * }
+ */
+
+/* Some examples:
+ * "" 9c1185a5c5e9fc54612808977ee8f548b2258d31
+ * "a" 0bdc9d2d256b3ee9daae347be6f4dc835a467ffe
+ * "abc" 8eb208f7e05d987a9b044a8e98c6b087f15a0bfc
+ * "message digest" 5d0689ef49d2fae572b881b123a85ffa21595f36
+ * "a...z" f71c27109c692c1b56bbdceb5b9d2865b3708dbc
+ * "abcdbcde...nopq" 12a053384a9c0c88e405a06c27dcf49ada62eb2b
+ * "A...Za...z0...9" b0e20b6e3116640286ed3a87a5713079b21f5189
+ * 8 times "1234567890" 9b752e45573d4b39f4dbd3323cab82bf63326bfb
+ * 1 million times "a" 52783243c1697bdbe16d37f97f68f08325dc1528
+ */
+
+typedef struct
+{
+ gcry_md_block_ctx_t bctx;
+ u32 h0,h1,h2,h3,h4;
+} RMD160_CONTEXT;
+
+
+static unsigned int
+transform ( void *ctx, const unsigned char *data, size_t nblks );
+
+static void
+rmd160_init (void *context, unsigned int flags)
+{
+ RMD160_CONTEXT *hd = context;
+
+ (void)flags;
+
+ hd->h0 = 0x67452301;
+ hd->h1 = 0xEFCDAB89;
+ hd->h2 = 0x98BADCFE;
+ hd->h3 = 0x10325476;
+ hd->h4 = 0xC3D2E1F0;
+
+ hd->bctx.nblocks = 0;
+ hd->bctx.nblocks_high = 0;
+ hd->bctx.count = 0;
+ hd->bctx.blocksize_shift = _gcry_ctz(64);
+ hd->bctx.bwrite = transform;
+}
+
+
+/****************
+ * Transform the message X which consists of 16 32-bit-words
+ */
+static unsigned int
+transform_blk ( void *ctx, const unsigned char *data )
+{
+ RMD160_CONTEXT *hd = ctx;
+ register u32 al, ar, bl, br, cl, cr, dl, dr, el, er;
+ u32 x[16];
+ int i;
+
+ for ( i = 0; i < 16; i++ )
+ x[i] = buf_get_le32(data + i * 4);
+
+#define K0 0x00000000
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xA953FD4E
+#define KK0 0x50A28BE6
+#define KK1 0x5C4DD124
+#define KK2 0x6D703EF3
+#define KK3 0x7A6D76E9
+#define KK4 0x00000000
+#define F0(x,y,z) ( (x) ^ (y) ^ (z) )
+#define F1(x,y,z) ( ((x) & (y)) | (~(x) & (z)) )
+#define F2(x,y,z) ( ((x) | ~(y)) ^ (z) )
+#define F3(x,y,z) ( ((x) & (z)) | ((y) & ~(z)) )
+#define F4(x,y,z) ( (x) ^ ((y) | ~(z)) )
+#define R(a,b,c,d,e,f,k,r,s) do { a += f(b,c,d) + k + x[r]; \
+ a = rol(a,s) + e; \
+ c = rol(c,10); \
+ } while(0)
+
+ /* left lane and right lanes interleaved */
+ al = ar = hd->h0;
+ bl = br = hd->h1;
+ cl = cr = hd->h2;
+ dl = dr = hd->h3;
+ el = er = hd->h4;
+ R( al, bl, cl, dl, el, F0, K0, 0, 11 );
+ R( ar, br, cr, dr, er, F4, KK0, 5, 8);
+ R( el, al, bl, cl, dl, F0, K0, 1, 14 );
+ R( er, ar, br, cr, dr, F4, KK0, 14, 9);
+ R( dl, el, al, bl, cl, F0, K0, 2, 15 );
+ R( dr, er, ar, br, cr, F4, KK0, 7, 9);
+ R( cl, dl, el, al, bl, F0, K0, 3, 12 );
+ R( cr, dr, er, ar, br, F4, KK0, 0, 11);
+ R( bl, cl, dl, el, al, F0, K0, 4, 5 );
+ R( br, cr, dr, er, ar, F4, KK0, 9, 13);
+ R( al, bl, cl, dl, el, F0, K0, 5, 8 );
+ R( ar, br, cr, dr, er, F4, KK0, 2, 15);
+ R( el, al, bl, cl, dl, F0, K0, 6, 7 );
+ R( er, ar, br, cr, dr, F4, KK0, 11, 15);
+ R( dl, el, al, bl, cl, F0, K0, 7, 9 );
+ R( dr, er, ar, br, cr, F4, KK0, 4, 5);
+ R( cl, dl, el, al, bl, F0, K0, 8, 11 );
+ R( cr, dr, er, ar, br, F4, KK0, 13, 7);
+ R( bl, cl, dl, el, al, F0, K0, 9, 13 );
+ R( br, cr, dr, er, ar, F4, KK0, 6, 7);
+ R( al, bl, cl, dl, el, F0, K0, 10, 14 );
+ R( ar, br, cr, dr, er, F4, KK0, 15, 8);
+ R( el, al, bl, cl, dl, F0, K0, 11, 15 );
+ R( er, ar, br, cr, dr, F4, KK0, 8, 11);
+ R( dl, el, al, bl, cl, F0, K0, 12, 6 );
+ R( dr, er, ar, br, cr, F4, KK0, 1, 14);
+ R( cl, dl, el, al, bl, F0, K0, 13, 7 );
+ R( cr, dr, er, ar, br, F4, KK0, 10, 14);
+ R( bl, cl, dl, el, al, F0, K0, 14, 9 );
+ R( br, cr, dr, er, ar, F4, KK0, 3, 12);
+ R( al, bl, cl, dl, el, F0, K0, 15, 8 );
+ R( ar, br, cr, dr, er, F4, KK0, 12, 6);
+ R( el, al, bl, cl, dl, F1, K1, 7, 7 );
+ R( er, ar, br, cr, dr, F3, KK1, 6, 9);
+ R( dl, el, al, bl, cl, F1, K1, 4, 6 );
+ R( dr, er, ar, br, cr, F3, KK1, 11, 13);
+ R( cl, dl, el, al, bl, F1, K1, 13, 8 );
+ R( cr, dr, er, ar, br, F3, KK1, 3, 15);
+ R( bl, cl, dl, el, al, F1, K1, 1, 13 );
+ R( br, cr, dr, er, ar, F3, KK1, 7, 7);
+ R( al, bl, cl, dl, el, F1, K1, 10, 11 );
+ R( ar, br, cr, dr, er, F3, KK1, 0, 12);
+ R( el, al, bl, cl, dl, F1, K1, 6, 9 );
+ R( er, ar, br, cr, dr, F3, KK1, 13, 8);
+ R( dl, el, al, bl, cl, F1, K1, 15, 7 );
+ R( dr, er, ar, br, cr, F3, KK1, 5, 9);
+ R( cl, dl, el, al, bl, F1, K1, 3, 15 );
+ R( cr, dr, er, ar, br, F3, KK1, 10, 11);
+ R( bl, cl, dl, el, al, F1, K1, 12, 7 );
+ R( br, cr, dr, er, ar, F3, KK1, 14, 7);
+ R( al, bl, cl, dl, el, F1, K1, 0, 12 );
+ R( ar, br, cr, dr, er, F3, KK1, 15, 7);
+ R( el, al, bl, cl, dl, F1, K1, 9, 15 );
+ R( er, ar, br, cr, dr, F3, KK1, 8, 12);
+ R( dl, el, al, bl, cl, F1, K1, 5, 9 );
+ R( dr, er, ar, br, cr, F3, KK1, 12, 7);
+ R( cl, dl, el, al, bl, F1, K1, 2, 11 );
+ R( cr, dr, er, ar, br, F3, KK1, 4, 6);
+ R( bl, cl, dl, el, al, F1, K1, 14, 7 );
+ R( br, cr, dr, er, ar, F3, KK1, 9, 15);
+ R( al, bl, cl, dl, el, F1, K1, 11, 13 );
+ R( ar, br, cr, dr, er, F3, KK1, 1, 13);
+ R( el, al, bl, cl, dl, F1, K1, 8, 12 );
+ R( er, ar, br, cr, dr, F3, KK1, 2, 11);
+ R( dl, el, al, bl, cl, F2, K2, 3, 11 );
+ R( dr, er, ar, br, cr, F2, KK2, 15, 9);
+ R( cl, dl, el, al, bl, F2, K2, 10, 13 );
+ R( cr, dr, er, ar, br, F2, KK2, 5, 7);
+ R( bl, cl, dl, el, al, F2, K2, 14, 6 );
+ R( br, cr, dr, er, ar, F2, KK2, 1, 15);
+ R( al, bl, cl, dl, el, F2, K2, 4, 7 );
+ R( ar, br, cr, dr, er, F2, KK2, 3, 11);
+ R( el, al, bl, cl, dl, F2, K2, 9, 14 );
+ R( er, ar, br, cr, dr, F2, KK2, 7, 8);
+ R( dl, el, al, bl, cl, F2, K2, 15, 9 );
+ R( dr, er, ar, br, cr, F2, KK2, 14, 6);
+ R( cl, dl, el, al, bl, F2, K2, 8, 13 );
+ R( cr, dr, er, ar, br, F2, KK2, 6, 6);
+ R( bl, cl, dl, el, al, F2, K2, 1, 15 );
+ R( br, cr, dr, er, ar, F2, KK2, 9, 14);
+ R( al, bl, cl, dl, el, F2, K2, 2, 14 );
+ R( ar, br, cr, dr, er, F2, KK2, 11, 12);
+ R( el, al, bl, cl, dl, F2, K2, 7, 8 );
+ R( er, ar, br, cr, dr, F2, KK2, 8, 13);
+ R( dl, el, al, bl, cl, F2, K2, 0, 13 );
+ R( dr, er, ar, br, cr, F2, KK2, 12, 5);
+ R( cl, dl, el, al, bl, F2, K2, 6, 6 );
+ R( cr, dr, er, ar, br, F2, KK2, 2, 14);
+ R( bl, cl, dl, el, al, F2, K2, 13, 5 );
+ R( br, cr, dr, er, ar, F2, KK2, 10, 13);
+ R( al, bl, cl, dl, el, F2, K2, 11, 12 );
+ R( ar, br, cr, dr, er, F2, KK2, 0, 13);
+ R( el, al, bl, cl, dl, F2, K2, 5, 7 );
+ R( er, ar, br, cr, dr, F2, KK2, 4, 7);
+ R( dl, el, al, bl, cl, F2, K2, 12, 5 );
+ R( dr, er, ar, br, cr, F2, KK2, 13, 5);
+ R( cl, dl, el, al, bl, F3, K3, 1, 11 );
+ R( cr, dr, er, ar, br, F1, KK3, 8, 15);
+ R( bl, cl, dl, el, al, F3, K3, 9, 12 );
+ R( br, cr, dr, er, ar, F1, KK3, 6, 5);
+ R( al, bl, cl, dl, el, F3, K3, 11, 14 );
+ R( ar, br, cr, dr, er, F1, KK3, 4, 8);
+ R( el, al, bl, cl, dl, F3, K3, 10, 15 );
+ R( er, ar, br, cr, dr, F1, KK3, 1, 11);
+ R( dl, el, al, bl, cl, F3, K3, 0, 14 );
+ R( dr, er, ar, br, cr, F1, KK3, 3, 14);
+ R( cl, dl, el, al, bl, F3, K3, 8, 15 );
+ R( cr, dr, er, ar, br, F1, KK3, 11, 14);
+ R( bl, cl, dl, el, al, F3, K3, 12, 9 );
+ R( br, cr, dr, er, ar, F1, KK3, 15, 6);
+ R( al, bl, cl, dl, el, F3, K3, 4, 8 );
+ R( ar, br, cr, dr, er, F1, KK3, 0, 14);
+ R( el, al, bl, cl, dl, F3, K3, 13, 9 );
+ R( er, ar, br, cr, dr, F1, KK3, 5, 6);
+ R( dl, el, al, bl, cl, F3, K3, 3, 14 );
+ R( dr, er, ar, br, cr, F1, KK3, 12, 9);
+ R( cl, dl, el, al, bl, F3, K3, 7, 5 );
+ R( cr, dr, er, ar, br, F1, KK3, 2, 12);
+ R( bl, cl, dl, el, al, F3, K3, 15, 6 );
+ R( br, cr, dr, er, ar, F1, KK3, 13, 9);
+ R( al, bl, cl, dl, el, F3, K3, 14, 8 );
+ R( ar, br, cr, dr, er, F1, KK3, 9, 12);
+ R( el, al, bl, cl, dl, F3, K3, 5, 6 );
+ R( er, ar, br, cr, dr, F1, KK3, 7, 5);
+ R( dl, el, al, bl, cl, F3, K3, 6, 5 );
+ R( dr, er, ar, br, cr, F1, KK3, 10, 15);
+ R( cl, dl, el, al, bl, F3, K3, 2, 12 );
+ R( cr, dr, er, ar, br, F1, KK3, 14, 8);
+ R( bl, cl, dl, el, al, F4, K4, 4, 9 );
+ R( br, cr, dr, er, ar, F0, KK4, 12, 8);
+ R( al, bl, cl, dl, el, F4, K4, 0, 15 );
+ R( ar, br, cr, dr, er, F0, KK4, 15, 5);
+ R( el, al, bl, cl, dl, F4, K4, 5, 5 );
+ R( er, ar, br, cr, dr, F0, KK4, 10, 12);
+ R( dl, el, al, bl, cl, F4, K4, 9, 11 );
+ R( dr, er, ar, br, cr, F0, KK4, 4, 9);
+ R( cl, dl, el, al, bl, F4, K4, 7, 6 );
+ R( cr, dr, er, ar, br, F0, KK4, 1, 12);
+ R( bl, cl, dl, el, al, F4, K4, 12, 8 );
+ R( br, cr, dr, er, ar, F0, KK4, 5, 5);
+ R( al, bl, cl, dl, el, F4, K4, 2, 13 );
+ R( ar, br, cr, dr, er, F0, KK4, 8, 14);
+ R( el, al, bl, cl, dl, F4, K4, 10, 12 );
+ R( er, ar, br, cr, dr, F0, KK4, 7, 6);
+ R( dl, el, al, bl, cl, F4, K4, 14, 5 );
+ R( dr, er, ar, br, cr, F0, KK4, 6, 8);
+ R( cl, dl, el, al, bl, F4, K4, 1, 12 );
+ R( cr, dr, er, ar, br, F0, KK4, 2, 13);
+ R( bl, cl, dl, el, al, F4, K4, 3, 13 );
+ R( br, cr, dr, er, ar, F0, KK4, 13, 6);
+ R( al, bl, cl, dl, el, F4, K4, 8, 14 );
+ R( ar, br, cr, dr, er, F0, KK4, 14, 5);
+ R( el, al, bl, cl, dl, F4, K4, 11, 11 );
+ R( er, ar, br, cr, dr, F0, KK4, 0, 15);
+ R( dl, el, al, bl, cl, F4, K4, 6, 8 );
+ R( dr, er, ar, br, cr, F0, KK4, 3, 13);
+ R( cl, dl, el, al, bl, F4, K4, 15, 5 );
+ R( cr, dr, er, ar, br, F0, KK4, 9, 11);
+ R( bl, cl, dl, el, al, F4, K4, 13, 6 );
+ R( br, cr, dr, er, ar, F0, KK4, 11, 11);
+
+ dr += cl + hd->h1;
+ hd->h1 = hd->h2 + dl + er;
+ hd->h2 = hd->h3 + el + ar;
+ hd->h3 = hd->h4 + al + br;
+ hd->h4 = hd->h0 + bl + cr;
+ hd->h0 = dr;
+
+ return /*burn_stack*/ 104+5*sizeof(void*);
+}
+
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+ unsigned int burn;
+
+ do
+ {
+ burn = transform_blk (c, data);
+ data += 64;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+
+/*
+ * The routine terminates the computation
+ */
+static void
+rmd160_final( void *context )
+{
+ RMD160_CONTEXT *hd = context;
+ u32 t, th, msb, lsb;
+ byte *p;
+ unsigned int burn;
+
+ t = hd->bctx.nblocks;
+ if (sizeof t == sizeof hd->bctx.nblocks)
+ th = hd->bctx.nblocks_high;
+ else
+ th = hd->bctx.nblocks >> 32;
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 26);
+ /* add the count */
+ t = lsb;
+ if( (lsb += hd->bctx.count) < t )
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if (hd->bctx.count < 56) /* enough room */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+ if (hd->bctx.count < 56)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 60, msb);
+ burn = transform (hd, hd->bctx.buf, 1);
+ }
+ else /* need one extra block */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+ /* fill pad and next block with zeroes */
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+ burn = transform (hd, hd->bctx.buf, 2);
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_le32(p, hd->h##a); p += 4; } while(0)
+ X(0);
+ X(1);
+ X(2);
+ X(3);
+ X(4);
+#undef X
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static byte *
+rmd160_read( void *context )
+{
+ RMD160_CONTEXT *hd = context;
+
+ return hd->bctx.buf;
+}
+
+
+
+/****************
+ * Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 20 bytes.
+ */
+void
+_gcry_rmd160_hash_buffer (void *outbuf, const void *buffer, size_t length )
+{
+ RMD160_CONTEXT hd;
+
+ rmd160_init (&hd, 0);
+ _gcry_md_block_write ( &hd, buffer, length );
+ rmd160_final ( &hd );
+ memcpy ( outbuf, hd.bctx.buf, 20 );
+}
+
+/* Variant of the above shortcut function using a multiple buffers. */
+static void
+_gcry_rmd160_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
+{
+ RMD160_CONTEXT hd;
+
+ rmd160_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ rmd160_final ( &hd );
+ memcpy ( outbuf, hd.bctx.buf, 20 );
+}
+
+
+static byte asn[15] = /* Object ID is 1.3.36.3.2.1 */
+ { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x24, 0x03,
+ 0x02, 0x01, 0x05, 0x00, 0x04, 0x14 };
+
+static gcry_md_oid_spec_t oid_spec_rmd160[] =
+ {
+ /* rsaSignatureWithripemd160 */
+ { "1.3.36.3.3.1.2" },
+ /* TeleTrust hash algorithm. */
+ { "1.3.36.3.2.1" },
+ { NULL }
+ };
+
+gcry_md_spec_t _gcry_digest_spec_rmd160 =
+ {
+ GCRY_MD_RMD160, {0, 0},
+ "RIPEMD160", asn, DIM (asn), oid_spec_rmd160, 20,
+ rmd160_init, _gcry_md_block_write, rmd160_final, rmd160_read, NULL,
+ _gcry_rmd160_hash_buffer, _gcry_rmd160_hash_buffers,
+ sizeof (RMD160_CONTEXT)
+ };
diff --git a/comm/third_party/libgcrypt/cipher/rsa-common.c b/comm/third_party/libgcrypt/cipher/rsa-common.c
new file mode 100644
index 0000000000..29b7bc8148
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rsa-common.c
@@ -0,0 +1,1038 @@
+/* rsa-common.c - Supporting functions for RSA
+ * Copyright (C) 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+/* Turn VALUE into an octet string and store it in an allocated buffer
+ at R_FRAME or - if R_RAME is NULL - copy it into the caller
+ provided buffer SPACE; either SPACE or R_FRAME may be used. If
+ SPACE if not NULL, the caller must provide a buffer of at least
+ NBYTES. If the resulting octet string is shorter than NBYTES pad
+ it to the left with zeroes. If VALUE does not fit into NBYTES
+ return an error code. */
+static gpg_err_code_t
+octet_string_from_mpi (unsigned char **r_frame, void *space,
+ gcry_mpi_t value, size_t nbytes)
+{
+ return _gcry_mpi_to_octet_string (r_frame, space, value, nbytes);
+}
+
+
+
+/* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block
+ type 2 padding. On success the result is stored as a new MPI at
+ R_RESULT. On error the value at R_RESULT is undefined.
+
+ If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
+ the seed instead of using a random string for it. This feature is
+ only useful for regression tests. Note that this value may not
+ contain zero bytes.
+
+ We encode the value in this way:
+
+ 0 2 RND(n bytes) 0 VALUE
+
+ 0 is a marker we unfortunately can't encode because we return an
+ MPI which strips all leading zeroes.
+ 2 is the block type.
+ RND are non-zero random bytes.
+
+ (Note that OpenPGP includes the cipher algorithm and a checksum in
+ VALUE; the caller needs to prepare the value accordingly.)
+ */
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_enc (gcry_mpi_t *r_result, unsigned int nbits,
+ const unsigned char *value, size_t valuelen,
+ const unsigned char *random_override,
+ size_t random_override_len)
+{
+ gcry_err_code_t rc = 0;
+ unsigned char *frame = NULL;
+ size_t nframe = (nbits+7) / 8;
+ int i;
+ size_t n;
+ unsigned char *p;
+
+ if (valuelen + 7 > nframe || !nframe)
+ {
+ /* Can't encode a VALUELEN value in a NFRAME bytes frame. */
+ return GPG_ERR_TOO_SHORT; /* The key is too short. */
+ }
+
+ if ( !(frame = xtrymalloc_secure (nframe)))
+ return gpg_err_code_from_syserror ();
+
+ n = 0;
+ frame[n++] = 0;
+ frame[n++] = 2; /* block type */
+ i = nframe - 3 - valuelen;
+ gcry_assert (i > 0);
+
+ if (random_override)
+ {
+ int j;
+
+ if (random_override_len != i)
+ {
+ xfree (frame);
+ return GPG_ERR_INV_ARG;
+ }
+ /* Check that random does not include a zero byte. */
+ for (j=0; j < random_override_len; j++)
+ if (!random_override[j])
+ {
+ xfree (frame);
+ return GPG_ERR_INV_ARG;
+ }
+ memcpy (frame + n, random_override, random_override_len);
+ n += random_override_len;
+ }
+ else
+ {
+ p = _gcry_random_bytes_secure (i, GCRY_STRONG_RANDOM);
+ /* Replace zero bytes by new values. */
+ for (;;)
+ {
+ int j, k;
+ unsigned char *pp;
+
+ /* Count the zero bytes. */
+ for (j=k=0; j < i; j++)
+ {
+ if (!p[j])
+ k++;
+ }
+ if (!k)
+ break; /* Okay: no (more) zero bytes. */
+
+ k += k/128 + 3; /* Better get some more. */
+ pp = _gcry_random_bytes_secure (k, GCRY_STRONG_RANDOM);
+ for (j=0; j < i && k; )
+ {
+ if (!p[j])
+ p[j] = pp[--k];
+ if (p[j])
+ j++;
+ }
+ xfree (pp);
+ }
+ memcpy (frame+n, p, i);
+ n += i;
+ xfree (p);
+ }
+
+ frame[n++] = 0;
+ memcpy (frame+n, value, valuelen);
+ n += valuelen;
+ gcry_assert (n == nframe);
+
+ rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
+ if (!rc &&DBG_CIPHER)
+ log_mpidump ("PKCS#1 block type 2 encoded data", *r_result);
+ xfree (frame);
+
+ return rc;
+}
+
+
+/* Decode a plaintext in VALUE assuming pkcs#1 block type 2 padding.
+ NBITS is the size of the secret key. On success the result is
+ stored as a newly allocated buffer at R_RESULT and its valid length at
+ R_RESULTLEN. On error NULL is stored at R_RESULT. */
+gpg_err_code_t
+_gcry_rsa_pkcs1_decode_for_enc (unsigned char **r_result, size_t *r_resultlen,
+ unsigned int nbits, gcry_mpi_t value)
+{
+ gcry_error_t err;
+ unsigned char *frame = NULL;
+ size_t nframe = (nbits+7) / 8;
+ size_t n;
+
+ *r_result = NULL;
+
+ if ( !(frame = xtrymalloc_secure (nframe)))
+ return gpg_err_code_from_syserror ();
+
+ err = _gcry_mpi_print (GCRYMPI_FMT_USG, frame, nframe, &n, value);
+ if (err)
+ {
+ xfree (frame);
+ return gcry_err_code (err);
+ }
+
+ nframe = n; /* Set NFRAME to the actual length. */
+
+ /* FRAME = 0x00 || 0x02 || PS || 0x00 || M
+
+ pkcs#1 requires that the first byte is zero. Our MPIs usually
+ strip leading zero bytes; thus we are not able to detect them.
+ However due to the way gcry_mpi_print is implemented we may see
+ leading zero bytes nevertheless. We handle this by making the
+ first zero byte optional. */
+ if (nframe < 4)
+ {
+ xfree (frame);
+ return GPG_ERR_ENCODING_PROBLEM; /* Too short. */
+ }
+ n = 0;
+ if (!frame[0])
+ n++;
+ if (frame[n++] != 0x02)
+ {
+ xfree (frame);
+ return GPG_ERR_ENCODING_PROBLEM; /* Wrong block type. */
+ }
+
+ /* Skip the non-zero random bytes and the terminating zero byte. */
+ for (; n < nframe && frame[n] != 0x00; n++)
+ ;
+ if (n+1 >= nframe)
+ {
+ xfree (frame);
+ return GPG_ERR_ENCODING_PROBLEM; /* No zero byte. */
+ }
+ n++; /* Skip the zero byte. */
+
+ /* To avoid an extra allocation we reuse the frame buffer. The only
+ caller of this function will anyway free the result soon. */
+ memmove (frame, frame + n, nframe - n);
+ *r_result = frame;
+ *r_resultlen = nframe - n;
+
+ if (DBG_CIPHER)
+ log_printhex ("value extracted from PKCS#1 block type 2 encoded data",
+ *r_result, *r_resultlen);
+
+ return 0;
+}
+
+
+/* Encode {VALUE,VALUELEN} for an NBITS keys and hash algorithm ALGO
+ using the pkcs#1 block type 1 padding. On success the result is
+ stored as a new MPI at R_RESULT. On error the value at R_RESULT is
+ undefined.
+
+ We encode the value in this way:
+
+ 0 1 PAD(n bytes) 0 ASN(asnlen bytes) VALUE(valuelen bytes)
+
+ 0 is a marker we unfortunately can't encode because we return an
+ MPI which strips all leading zeroes.
+ 1 is the block type.
+ PAD consists of 0xff bytes.
+ 0 marks the end of the padding.
+ ASN is the DER encoding of the hash algorithm; along with the VALUE
+ it yields a valid DER encoding.
+
+ (Note that PGP prior to version 2.3 encoded the message digest as:
+ 0 1 MD(16 bytes) 0 PAD(n bytes) 1
+ The MD is always 16 bytes here because it's always MD5. GnuPG
+ does not not support pre-v2.3 signatures, but I'm including this
+ comment so the information is easily found if needed.)
+*/
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+ const unsigned char *value, size_t valuelen,
+ int algo)
+{
+ gcry_err_code_t rc = 0;
+ byte asn[100];
+ byte *frame = NULL;
+ size_t nframe = (nbits+7) / 8;
+ int i;
+ size_t n;
+ size_t asnlen, dlen;
+
+ asnlen = DIM(asn);
+ dlen = _gcry_md_get_algo_dlen (algo);
+
+ if (_gcry_md_algo_info (algo, GCRYCTL_GET_ASNOID, asn, &asnlen))
+ {
+ /* We don't have yet all of the above algorithms. */
+ return GPG_ERR_NOT_IMPLEMENTED;
+ }
+
+ if ( valuelen != dlen )
+ {
+ /* Hash value does not match the length of digest for
+ the given algorithm. */
+ return GPG_ERR_CONFLICT;
+ }
+
+ if ( !dlen || dlen + asnlen + 4 > nframe)
+ {
+ /* Can't encode an DLEN byte digest MD into an NFRAME byte
+ frame. */
+ return GPG_ERR_TOO_SHORT;
+ }
+
+ if ( !(frame = xtrymalloc (nframe)) )
+ return gpg_err_code_from_syserror ();
+
+ /* Assemble the pkcs#1 block type 1. */
+ n = 0;
+ frame[n++] = 0;
+ frame[n++] = 1; /* block type */
+ i = nframe - valuelen - asnlen - 3 ;
+ gcry_assert (i > 1);
+ memset (frame+n, 0xff, i );
+ n += i;
+ frame[n++] = 0;
+ memcpy (frame+n, asn, asnlen);
+ n += asnlen;
+ memcpy (frame+n, value, valuelen );
+ n += valuelen;
+ gcry_assert (n == nframe);
+
+ /* Convert it into an MPI. */
+ rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
+ if (!rc && DBG_CIPHER)
+ log_mpidump ("PKCS#1 block type 1 encoded data", *r_result);
+ xfree (frame);
+
+ return rc;
+}
+
+/* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block
+ type 1 padding. On success the result is stored as a new MPI at
+ R_RESULT. On error the value at R_RESULT is undefined.
+
+ We encode the value in this way:
+
+ 0 1 PAD(n bytes) 0 VALUE(valuelen bytes)
+
+ 0 is a marker we unfortunately can't encode because we return an
+ MPI which strips all leading zeroes.
+ 1 is the block type.
+ PAD consists of 0xff bytes.
+ 0 marks the end of the padding.
+
+ (Note that PGP prior to version 2.3 encoded the message digest as:
+ 0 1 MD(16 bytes) 0 PAD(n bytes) 1
+ The MD is always 16 bytes here because it's always MD5. GnuPG
+ does not not support pre-v2.3 signatures, but I'm including this
+ comment so the information is easily found if needed.)
+*/
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_raw_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+ const unsigned char *value, size_t valuelen)
+{
+ gcry_err_code_t rc = 0;
+ gcry_error_t err;
+ byte *frame = NULL;
+ size_t nframe = (nbits+7) / 8;
+ int i;
+ size_t n;
+
+ if ( !valuelen || valuelen + 4 > nframe)
+ {
+ /* Can't encode an DLEN byte digest MD into an NFRAME byte
+ frame. */
+ return GPG_ERR_TOO_SHORT;
+ }
+
+ if ( !(frame = xtrymalloc (nframe)) )
+ return gpg_err_code_from_syserror ();
+
+ /* Assemble the pkcs#1 block type 1. */
+ n = 0;
+ frame[n++] = 0;
+ frame[n++] = 1; /* block type */
+ i = nframe - valuelen - 3 ;
+ gcry_assert (i > 1);
+ memset (frame+n, 0xff, i );
+ n += i;
+ frame[n++] = 0;
+ memcpy (frame+n, value, valuelen );
+ n += valuelen;
+ gcry_assert (n == nframe);
+
+ /* Convert it into an MPI. */
+ err = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
+ if (err)
+ rc = gcry_err_code (err);
+ else if (DBG_CIPHER)
+ log_mpidump ("PKCS#1 block type 1 encoded data", *r_result);
+ xfree (frame);
+
+ return rc;
+}
+
+
+/* Mask generation function for OAEP. See RFC-3447 B.2.1. */
+static gcry_err_code_t
+mgf1 (unsigned char *output, size_t outlen, unsigned char *seed, size_t seedlen,
+ int algo)
+{
+ size_t dlen, nbytes, n;
+ int idx;
+ gcry_md_hd_t hd;
+ gcry_err_code_t err;
+
+ err = _gcry_md_open (&hd, algo, 0);
+ if (err)
+ return err;
+
+ dlen = _gcry_md_get_algo_dlen (algo);
+
+ /* We skip step 1 which would be assert(OUTLEN <= 2^32). The loop
+ in step 3 is merged with step 4 by concatenating no more octets
+ than what would fit into OUTPUT. The ceiling for the counter IDX
+ is implemented indirectly. */
+ nbytes = 0; /* Step 2. */
+ idx = 0;
+ while ( nbytes < outlen )
+ {
+ unsigned char c[4], *digest;
+
+ if (idx)
+ _gcry_md_reset (hd);
+
+ c[0] = (idx >> 24) & 0xFF;
+ c[1] = (idx >> 16) & 0xFF;
+ c[2] = (idx >> 8) & 0xFF;
+ c[3] = idx & 0xFF;
+ idx++;
+
+ _gcry_md_write (hd, seed, seedlen);
+ _gcry_md_write (hd, c, 4);
+ digest = _gcry_md_read (hd, 0);
+
+ n = (outlen - nbytes < dlen)? (outlen - nbytes) : dlen;
+ memcpy (output+nbytes, digest, n);
+ nbytes += n;
+ }
+
+ _gcry_md_close (hd);
+ return GPG_ERR_NO_ERROR;
+}
+
+
+/* RFC-3447 (pkcs#1 v2.1) OAEP encoding. NBITS is the length of the
+ key measured in bits. ALGO is the hash function; it must be a
+ valid and usable algorithm. {VALUE,VALUELEN} is the message to
+ encrypt. {LABEL,LABELLEN} is the optional label to be associated
+ with the message, if LABEL is NULL the default is to use the empty
+ string as label. On success the encoded ciphertext is returned at
+ R_RESULT.
+
+ If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
+ the seed instead of using a random string for it. This feature is
+ only useful for regression tests.
+
+ Here is figure 1 from the RFC depicting the process:
+
+ +----------+---------+-------+
+ DB = | lHash | PS | M |
+ +----------+---------+-------+
+ |
+ +----------+ V
+ | seed |--> MGF ---> xor
+ +----------+ |
+ | |
+ +--+ V |
+ |00| xor <----- MGF <-----|
+ +--+ | |
+ | | |
+ V V V
+ +--+----------+----------------------------+
+ EM = |00|maskedSeed| maskedDB |
+ +--+----------+----------------------------+
+ */
+gpg_err_code_t
+_gcry_rsa_oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+ const unsigned char *value, size_t valuelen,
+ const unsigned char *label, size_t labellen,
+ const void *random_override, size_t random_override_len)
+{
+ gcry_err_code_t rc = 0;
+ unsigned char *frame = NULL;
+ size_t nframe = (nbits+7) / 8;
+ unsigned char *p;
+ size_t hlen;
+ size_t n;
+
+ *r_result = NULL;
+
+ /* Set defaults for LABEL. */
+ if (!label || !labellen)
+ {
+ label = (const unsigned char*)"";
+ labellen = 0;
+ }
+
+ hlen = _gcry_md_get_algo_dlen (algo);
+
+ /* We skip step 1a which would be to check that LABELLEN is not
+ greater than 2^61-1. See rfc-3447 7.1.1. */
+
+ /* Step 1b. Note that the obsolete rfc-2437 uses the check:
+ valuelen > nframe - 2 * hlen - 1 . */
+ if (valuelen > nframe - 2 * hlen - 2 || !nframe)
+ {
+ /* Can't encode a VALUELEN value in a NFRAME bytes frame. */
+ return GPG_ERR_TOO_SHORT; /* The key is too short. */
+ }
+
+ /* Allocate the frame. */
+ frame = xtrycalloc_secure (1, nframe);
+ if (!frame)
+ return gpg_err_code_from_syserror ();
+
+ /* Step 2a: Compute the hash of the label. We store it in the frame
+ where later the maskedDB will commence. */
+ _gcry_md_hash_buffer (algo, frame + 1 + hlen, label, labellen);
+
+ /* Step 2b: Set octet string to zero. */
+ /* This has already been done while allocating FRAME. */
+
+ /* Step 2c: Create DB by concatenating lHash, PS, 0x01 and M. */
+ n = nframe - valuelen - 1;
+ frame[n] = 0x01;
+ memcpy (frame + n + 1, value, valuelen);
+
+ /* Step 3d: Generate seed. We store it where the maskedSeed will go
+ later. */
+ if (random_override)
+ {
+ if (random_override_len != hlen)
+ {
+ xfree (frame);
+ return GPG_ERR_INV_ARG;
+ }
+ memcpy (frame + 1, random_override, hlen);
+ }
+ else
+ _gcry_randomize (frame + 1, hlen, GCRY_STRONG_RANDOM);
+
+ /* Step 2e and 2f: Create maskedDB. */
+ {
+ unsigned char *dmask;
+
+ dmask = xtrymalloc_secure (nframe - hlen - 1);
+ if (!dmask)
+ {
+ rc = gpg_err_code_from_syserror ();
+ xfree (frame);
+ return rc;
+ }
+ rc = mgf1 (dmask, nframe - hlen - 1, frame+1, hlen, algo);
+ if (rc)
+ {
+ xfree (dmask);
+ xfree (frame);
+ return rc;
+ }
+ for (n = 1 + hlen, p = dmask; n < nframe; n++)
+ frame[n] ^= *p++;
+ xfree (dmask);
+ }
+
+ /* Step 2g and 2h: Create maskedSeed. */
+ {
+ unsigned char *smask;
+
+ smask = xtrymalloc_secure (hlen);
+ if (!smask)
+ {
+ rc = gpg_err_code_from_syserror ();
+ xfree (frame);
+ return rc;
+ }
+ rc = mgf1 (smask, hlen, frame + 1 + hlen, nframe - hlen - 1, algo);
+ if (rc)
+ {
+ xfree (smask);
+ xfree (frame);
+ return rc;
+ }
+ for (n = 1, p = smask; n < 1 + hlen; n++)
+ frame[n] ^= *p++;
+ xfree (smask);
+ }
+
+ /* Step 2i: Concatenate 0x00, maskedSeed and maskedDB. */
+ /* This has already been done by using in-place operations. */
+
+ /* Convert the stuff into an MPI as expected by the caller. */
+ rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, nframe, NULL);
+ if (!rc && DBG_CIPHER)
+ log_mpidump ("OAEP encoded data", *r_result);
+ xfree (frame);
+
+ return rc;
+}
+
+
+/* RFC-3447 (pkcs#1 v2.1) OAEP decoding. NBITS is the length of the
+ key measured in bits. ALGO is the hash function; it must be a
+ valid and usable algorithm. VALUE is the raw decrypted message
+ {LABEL,LABELLEN} is the optional label to be associated with the
+ message, if LABEL is NULL the default is to use the empty string as
+ label. On success the plaintext is returned as a newly allocated
+ buffer at R_RESULT; its valid length is stored at R_RESULTLEN. On
+ error NULL is stored at R_RESULT. */
+gpg_err_code_t
+_gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen,
+ unsigned int nbits, int algo,
+ gcry_mpi_t value,
+ const unsigned char *label, size_t labellen)
+{
+ gcry_err_code_t rc;
+ unsigned char *frame = NULL; /* Encoded messages (EM). */
+ unsigned char *masked_seed; /* Points into FRAME. */
+ unsigned char *masked_db; /* Points into FRAME. */
+ unsigned char *seed = NULL; /* Allocated space for the seed and DB. */
+ unsigned char *db; /* Points into SEED. */
+ unsigned char *lhash = NULL; /* Hash of the label. */
+ size_t nframe; /* Length of the ciphertext (EM). */
+ size_t hlen; /* Length of the hash digest. */
+ size_t db_len; /* Length of DB and masked_db. */
+ size_t nkey = (nbits+7)/8; /* Length of the key in bytes. */
+ int failed = 0; /* Error indicator. */
+ size_t n;
+
+ *r_result = NULL;
+
+ /* This code is implemented as described by rfc-3447 7.1.2. */
+
+ /* Set defaults for LABEL. */
+ if (!label || !labellen)
+ {
+ label = (const unsigned char*)"";
+ labellen = 0;
+ }
+
+ /* Get the length of the digest. */
+ hlen = _gcry_md_get_algo_dlen (algo);
+
+ /* Hash the label right away. */
+ lhash = xtrymalloc (hlen);
+ if (!lhash)
+ return gpg_err_code_from_syserror ();
+ _gcry_md_hash_buffer (algo, lhash, label, labellen);
+
+ /* Turn the MPI into an octet string. If the octet string is
+ shorter than the key we pad it to the left with zeroes. This may
+ happen due to the leading zero in OAEP frames and due to the
+ following random octets (seed^mask) which may have leading zero
+ bytes. This all is needed to cope with our leading zeroes
+ suppressing MPI implementation. The code implictly implements
+ Step 1b (bail out if NFRAME != N). */
+ rc = octet_string_from_mpi (&frame, NULL, value, nkey);
+ if (rc)
+ {
+ xfree (lhash);
+ return GPG_ERR_ENCODING_PROBLEM;
+ }
+ nframe = nkey;
+
+ /* Step 1c: Check that the key is long enough. */
+ if ( nframe < 2 * hlen + 2 )
+ {
+ xfree (frame);
+ xfree (lhash);
+ return GPG_ERR_ENCODING_PROBLEM;
+ }
+
+ /* Step 2 has already been done by the caller and the
+ gcry_mpi_aprint above. */
+
+ /* Allocate space for SEED and DB. */
+ seed = xtrymalloc_secure (nframe - 1);
+ if (!seed)
+ {
+ rc = gpg_err_code_from_syserror ();
+ xfree (frame);
+ xfree (lhash);
+ return rc;
+ }
+ db = seed + hlen;
+
+ /* To avoid chosen ciphertext attacks from now on we make sure to
+ run all code even in the error case; this avoids possible timing
+ attacks as described by Manger. */
+
+ /* Step 3a: Hash the label. */
+ /* This has already been done. */
+
+ /* Step 3b: Separate the encoded message. */
+ masked_seed = frame + 1;
+ masked_db = frame + 1 + hlen;
+ db_len = nframe - 1 - hlen;
+
+ /* Step 3c and 3d: seed = maskedSeed ^ mgf(maskedDB, hlen). */
+ if (mgf1 (seed, hlen, masked_db, db_len, algo))
+ failed = 1;
+ for (n = 0; n < hlen; n++)
+ seed[n] ^= masked_seed[n];
+
+ /* Step 3e and 3f: db = maskedDB ^ mgf(seed, db_len). */
+ if (mgf1 (db, db_len, seed, hlen, algo))
+ failed = 1;
+ for (n = 0; n < db_len; n++)
+ db[n] ^= masked_db[n];
+
+ /* Step 3g: Check lhash, an possible empty padding string terminated
+ by 0x01 and the first byte of EM being 0. */
+ if (memcmp (lhash, db, hlen))
+ failed = 1;
+ for (n = hlen; n < db_len; n++)
+ if (db[n] == 0x01)
+ break;
+ if (n == db_len)
+ failed = 1;
+ if (frame[0])
+ failed = 1;
+
+ xfree (lhash);
+ xfree (frame);
+ if (failed)
+ {
+ xfree (seed);
+ return GPG_ERR_ENCODING_PROBLEM;
+ }
+
+ /* Step 4: Output M. */
+ /* To avoid an extra allocation we reuse the seed buffer. The only
+ caller of this function will anyway free the result soon. */
+ n++;
+ memmove (seed, db + n, db_len - n);
+ *r_result = seed;
+ *r_resultlen = db_len - n;
+ seed = NULL;
+
+ if (DBG_CIPHER)
+ log_printhex ("value extracted from OAEP encoded data",
+ *r_result, *r_resultlen);
+
+ return 0;
+}
+
+
+/* RFC-3447 (pkcs#1 v2.1) PSS encoding. Encode {VALUE,VALUELEN} for
+ an NBITS key. Note that VALUE is already the mHash from the
+ picture below. ALGO is a valid hash algorithm and SALTLEN is the
+ length of salt to be used. On success the result is stored as a
+ new MPI at R_RESULT. On error the value at R_RESULT is undefined.
+
+ If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
+ the salt instead of using a random string for the salt. This
+ feature is only useful for regression tests.
+
+ Here is figure 2 from the RFC (errata 595 applied) depicting the
+ process:
+
+ +-----------+
+ | M |
+ +-----------+
+ |
+ V
+ Hash
+ |
+ V
+ +--------+----------+----------+
+ M' = |Padding1| mHash | salt |
+ +--------+----------+----------+
+ |
+ +--------+----------+ V
+ DB = |Padding2| salt | Hash
+ +--------+----------+ |
+ | |
+ V | +----+
+ xor <--- MGF <---| |0xbc|
+ | | +----+
+ | | |
+ V V V
+ +-------------------+----------+----+
+ EM = | maskedDB | H |0xbc|
+ +-------------------+----------+----+
+
+ */
+gpg_err_code_t
+_gcry_rsa_pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+ const unsigned char *value, size_t valuelen, int saltlen,
+ const void *random_override, size_t random_override_len)
+{
+ gcry_err_code_t rc = 0;
+ size_t hlen; /* Length of the hash digest. */
+ unsigned char *em = NULL; /* Encoded message. */
+ size_t emlen = (nbits+7)/8; /* Length in bytes of EM. */
+ unsigned char *h; /* Points into EM. */
+ unsigned char *buf = NULL; /* Help buffer. */
+ size_t buflen; /* Length of BUF. */
+ unsigned char *mhash; /* Points into BUF. */
+ unsigned char *salt; /* Points into BUF. */
+ unsigned char *dbmask; /* Points into BUF. */
+ unsigned char *p;
+ size_t n;
+
+ /* This code is implemented as described by rfc-3447 9.1.1. */
+
+ /* Get the length of the digest. */
+ hlen = _gcry_md_get_algo_dlen (algo);
+ gcry_assert (hlen); /* We expect a valid ALGO here. */
+
+ /* Allocate a help buffer and setup some pointers. */
+ buflen = 8 + hlen + saltlen + (emlen - hlen - 1);
+ buf = xtrymalloc (buflen);
+ if (!buf)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+ mhash = buf + 8;
+ salt = mhash + hlen;
+ dbmask= salt + saltlen;
+
+ /* Step 2: That would be: mHash = Hash(M) but our input is already
+ mHash thus we do only a consistency check and copy to MHASH. */
+ if (valuelen != hlen)
+ {
+ rc = GPG_ERR_INV_LENGTH;
+ goto leave;
+ }
+ memcpy (mhash, value, hlen);
+
+ /* Step 3: Check length constraints. */
+ if (emlen < hlen + saltlen + 2)
+ {
+ rc = GPG_ERR_TOO_SHORT;
+ goto leave;
+ }
+
+ /* Allocate space for EM. */
+ em = xtrymalloc (emlen);
+ if (!em)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+ h = em + emlen - 1 - hlen;
+
+ /* Step 4: Create a salt. */
+ if (saltlen)
+ {
+ if (random_override)
+ {
+ if (random_override_len != saltlen)
+ {
+ rc = GPG_ERR_INV_ARG;
+ goto leave;
+ }
+ memcpy (salt, random_override, saltlen);
+ }
+ else
+ _gcry_randomize (salt, saltlen, GCRY_STRONG_RANDOM);
+ }
+
+ /* Step 5 and 6: M' = Hash(Padding1 || mHash || salt). */
+ memset (buf, 0, 8); /* Padding. */
+ _gcry_md_hash_buffer (algo, h, buf, 8 + hlen + saltlen);
+
+ /* Step 7 and 8: DB = PS || 0x01 || salt. */
+ /* Note that we use EM to store DB and later Xor in-place. */
+ p = em + emlen - 1 - hlen - saltlen - 1;
+ memset (em, 0, p - em);
+ *p++ = 0x01;
+ memcpy (p, salt, saltlen);
+
+ /* Step 9: dbmask = MGF(H, emlen - hlen - 1). */
+ mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo);
+
+ /* Step 10: maskedDB = DB ^ dbMask */
+ for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++)
+ em[n] ^= *p;
+
+ /* Step 11: Set the leftmost bits to zero. */
+ em[0] &= 0xFF >> (8 * emlen - nbits);
+
+ /* Step 12: EM = maskedDB || H || 0xbc. */
+ em[emlen-1] = 0xbc;
+
+ /* Convert EM into an MPI. */
+ rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, em, emlen, NULL);
+ if (!rc && DBG_CIPHER)
+ log_mpidump ("PSS encoded data", *r_result);
+
+ leave:
+ if (em)
+ {
+ wipememory (em, emlen);
+ xfree (em);
+ }
+ if (buf)
+ {
+ wipememory (buf, buflen);
+ xfree (buf);
+ }
+ return rc;
+}
+
+
+/* Verify a signature assuming PSS padding. VALUE is the hash of the
+ message (mHash) encoded as an MPI; its length must match the digest
+ length of ALGO. ENCODED is the output of the RSA public key
+ function (EM). NBITS is the size of the public key. ALGO is the
+ hash algorithm and SALTLEN is the length of the used salt. The
+ function returns 0 on success or on error code. */
+gpg_err_code_t
+_gcry_rsa_pss_verify (gcry_mpi_t value, gcry_mpi_t encoded,
+ unsigned int nbits, int algo, size_t saltlen)
+{
+ gcry_err_code_t rc = 0;
+ size_t hlen; /* Length of the hash digest. */
+ unsigned char *em = NULL; /* Encoded message. */
+ size_t emlen = (nbits+7)/8; /* Length in bytes of EM. */
+ unsigned char *salt; /* Points into EM. */
+ unsigned char *h; /* Points into EM. */
+ unsigned char *buf = NULL; /* Help buffer. */
+ size_t buflen; /* Length of BUF. */
+ unsigned char *dbmask; /* Points into BUF. */
+ unsigned char *mhash; /* Points into BUF. */
+ unsigned char *p;
+ size_t n;
+
+ /* This code is implemented as described by rfc-3447 9.1.2. */
+
+ /* Get the length of the digest. */
+ hlen = _gcry_md_get_algo_dlen (algo);
+ gcry_assert (hlen); /* We expect a valid ALGO here. */
+
+ /* Allocate a help buffer and setup some pointers.
+ This buffer is used for two purposes:
+ +------------------------------+-------+
+ 1. | dbmask | mHash |
+ +------------------------------+-------+
+ emlen - hlen - 1 hlen
+
+ +----------+-------+---------+-+-------+
+ 2. | padding1 | mHash | salt | | mHash |
+ +----------+-------+---------+-+-------+
+ 8 hlen saltlen hlen
+ */
+ buflen = 8 + hlen + saltlen;
+ if (buflen < emlen - hlen - 1)
+ buflen = emlen - hlen - 1;
+ buflen += hlen;
+ buf = xtrymalloc (buflen);
+ if (!buf)
+ {
+ rc = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+ dbmask = buf;
+ mhash = buf + buflen - hlen;
+
+ /* Step 2: That would be: mHash = Hash(M) but our input is already
+ mHash thus we only need to convert VALUE into MHASH. */
+ rc = octet_string_from_mpi (NULL, mhash, value, hlen);
+ if (rc)
+ goto leave;
+
+ /* Convert the signature into an octet string. */
+ rc = octet_string_from_mpi (&em, NULL, encoded, emlen);
+ if (rc)
+ goto leave;
+
+ /* Step 3: Check length of EM. Because we internally use MPI
+ functions we can't do this properly; EMLEN is always the length
+ of the key because octet_string_from_mpi needs to left pad the
+ result with zero to cope with the fact that our MPIs suppress all
+ leading zeroes. Thus what we test here are merely the digest and
+ salt lengths to the key. */
+ if (emlen < hlen + saltlen + 2)
+ {
+ rc = GPG_ERR_TOO_SHORT; /* For the hash and saltlen. */
+ goto leave;
+ }
+
+ /* Step 4: Check last octet. */
+ if (em[emlen - 1] != 0xbc)
+ {
+ rc = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+
+ /* Step 5: Split EM. */
+ h = em + emlen - 1 - hlen;
+
+ /* Step 6: Check the leftmost bits. */
+ if ((em[0] & ~(0xFF >> (8 * emlen - nbits))))
+ {
+ rc = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+
+ /* Step 7: dbmask = MGF(H, emlen - hlen - 1). */
+ mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo);
+
+ /* Step 8: maskedDB = DB ^ dbMask. */
+ for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++)
+ em[n] ^= *p;
+
+ /* Step 9: Set leftmost bits in DB to zero. */
+ em[0] &= 0xFF >> (8 * emlen - nbits);
+
+ /* Step 10: Check the padding of DB. */
+ for (n = 0; n < emlen - hlen - saltlen - 2 && !em[n]; n++)
+ ;
+ if (n != emlen - hlen - saltlen - 2 || em[n++] != 1)
+ {
+ rc = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+
+ /* Step 11: Extract salt from DB. */
+ salt = em + n;
+
+ /* Step 12: M' = (0x)00 00 00 00 00 00 00 00 || mHash || salt */
+ memset (buf, 0, 8);
+ memcpy (buf+8, mhash, hlen);
+ memcpy (buf+8+hlen, salt, saltlen);
+
+ /* Step 13: H' = Hash(M'). */
+ _gcry_md_hash_buffer (algo, buf, buf, 8 + hlen + saltlen);
+
+ /* Step 14: Check H == H'. */
+ rc = memcmp (h, buf, hlen) ? GPG_ERR_BAD_SIGNATURE : GPG_ERR_NO_ERROR;
+
+ leave:
+ if (em)
+ {
+ wipememory (em, emlen);
+ xfree (em);
+ }
+ if (buf)
+ {
+ wipememory (buf, buflen);
+ xfree (buf);
+ }
+ return rc;
+}
diff --git a/comm/third_party/libgcrypt/cipher/rsa.c b/comm/third_party/libgcrypt/cipher/rsa.c
new file mode 100644
index 0000000000..575ea94924
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rsa.c
@@ -0,0 +1,2035 @@
+/* rsa.c - RSA implementation
+ * Copyright (C) 1997, 1998, 1999 by Werner Koch (dd9jn)
+ * Copyright (C) 2000, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This code uses an algorithm protected by U.S. Patent #4,405,829
+ which expired on September 20, 2000. The patent holder placed that
+ patent into the public domain on Sep 6th, 2000.
+*/
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+typedef struct
+{
+ gcry_mpi_t n; /* modulus */
+ gcry_mpi_t e; /* exponent */
+} RSA_public_key;
+
+
+typedef struct
+{
+ gcry_mpi_t n; /* public modulus */
+ gcry_mpi_t e; /* public exponent */
+ gcry_mpi_t d; /* exponent */
+ gcry_mpi_t p; /* prime p. */
+ gcry_mpi_t q; /* prime q. */
+ gcry_mpi_t u; /* inverse of p mod q. */
+} RSA_secret_key;
+
+
+static const char *rsa_names[] =
+ {
+ "rsa",
+ "openpgp-rsa",
+ "oid.1.2.840.113549.1.1.1",
+ NULL,
+ };
+
+
+/* A sample 2048 bit RSA key used for the selftests. */
+static const char sample_secret_key[] =
+" (private-key"
+" (rsa"
+" (n #009F56231A3D82E3E7D613D59D53E9AB921BEF9F08A782AED0B6E46ADBC853EC"
+" 7C71C422435A3CD8FA0DB9EFD55CD3295BADC4E8E2E2B94E15AE82866AB8ADE8"
+" 7E469FAE76DC3577DE87F1F419C4EB41123DFAF8D16922D5EDBAD6E9076D5A1C"
+" 958106F0AE5E2E9193C6B49124C64C2A241C4075D4AF16299EB87A6585BAE917"
+" DEF27FCDD165764D069BC18D16527B29DAAB549F7BBED4A7C6A842D203ED6613"
+" 6E2411744E432CD26D940132F25874483DCAEECDFD95744819CBCF1EA810681C"
+" 42907EBCB1C7EAFBE75C87EC32C5413EA10476545D3FC7B2ADB1B66B7F200918"
+" 664B0E5261C2895AA28B0DE321E921B3F877172CCCAB81F43EF98002916156F6CB#)"
+" (e #010001#)"
+" (d #07EF82500C403899934FE993AC5A36F14FF2DF38CF1EF315F205EE4C83EDAA19"
+" 8890FC23DE9AA933CAFB37B6A8A8DBA675411958337287310D3FF2F1DDC0CB93"
+" 7E70F57F75F833C021852B631D2B9A520E4431A03C5C3FCB5742DCD841D9FB12"
+" 771AA1620DCEC3F1583426066ED9DC3F7028C5B59202C88FDF20396E2FA0EC4F"
+" 5A22D9008F3043673931BC14A5046D6327398327900867E39CC61B2D1AFE2F48"
+" EC8E1E3861C68D257D7425F4E6F99ABD77D61F10CA100EFC14389071831B33DD"
+" 69CC8EABEF860D1DC2AAA84ABEAE5DFC91BC124DAF0F4C8EF5BBEA436751DE84"
+" 3A8063E827A024466F44C28614F93B0732A100D4A0D86D532FE1E22C7725E401#)"
+" (p #00C29D438F115825779631CD665A5739367F3E128ADC29766483A46CA80897E0"
+" 79B32881860B8F9A6A04C2614A904F6F2578DAE13EA67CD60AE3D0AA00A1FF9B"
+" 441485E44B2DC3D0B60260FBFE073B5AC72FAF67964DE15C8212C389D20DB9CF"
+" 54AF6AEF5C4196EAA56495DD30CF709F499D5AB30CA35E086C2A1589D6283F1783#)"
+" (q #00D1984135231CB243FE959C0CBEF551EDD986AD7BEDF71EDF447BE3DA27AF46"
+" 79C974A6FA69E4D52FE796650623DE70622862713932AA2FD9F2EC856EAEAA77"
+" 88B4EA6084DC81C902F014829B18EA8B2666EC41586818E0589E18876065F97E"
+" 8D22CE2DA53A05951EC132DCEF41E70A9C35F4ACC268FFAC2ADF54FA1DA110B919#)"
+" (u #67CF0FD7635205DD80FA814EE9E9C267C17376BF3209FB5D1BC42890D2822A04"
+" 479DAF4D5B6ED69D0F8D1AF94164D07F8CD52ECEFE880641FA0F41DDAB1785E4"
+" A37A32F997A516480B4CD4F6482B9466A1765093ED95023CA32D5EDC1E34CEE9"
+" AF595BC51FE43C4BF810FA225AF697FB473B83815966188A4312C048B885E3F7#)))";
+
+/* A sample 2048 bit RSA key used for the selftests (public only). */
+static const char sample_public_key[] =
+" (public-key"
+" (rsa"
+" (n #009F56231A3D82E3E7D613D59D53E9AB921BEF9F08A782AED0B6E46ADBC853EC"
+" 7C71C422435A3CD8FA0DB9EFD55CD3295BADC4E8E2E2B94E15AE82866AB8ADE8"
+" 7E469FAE76DC3577DE87F1F419C4EB41123DFAF8D16922D5EDBAD6E9076D5A1C"
+" 958106F0AE5E2E9193C6B49124C64C2A241C4075D4AF16299EB87A6585BAE917"
+" DEF27FCDD165764D069BC18D16527B29DAAB549F7BBED4A7C6A842D203ED6613"
+" 6E2411744E432CD26D940132F25874483DCAEECDFD95744819CBCF1EA810681C"
+" 42907EBCB1C7EAFBE75C87EC32C5413EA10476545D3FC7B2ADB1B66B7F200918"
+" 664B0E5261C2895AA28B0DE321E921B3F877172CCCAB81F43EF98002916156F6CB#)"
+" (e #010001#)))";
+
+
+static int test_keys (RSA_secret_key *sk, unsigned nbits);
+static int check_secret_key (RSA_secret_key *sk);
+static void public (gcry_mpi_t output, gcry_mpi_t input, RSA_public_key *skey);
+static void secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey);
+static unsigned int rsa_get_nbits (gcry_sexp_t parms);
+
+
+/* Check that a freshly generated key actually works. Returns 0 on success. */
+static int
+test_keys (RSA_secret_key *sk, unsigned int nbits)
+{
+ int result = -1; /* Default to failure. */
+ RSA_public_key pk;
+ gcry_mpi_t plaintext = mpi_new (nbits);
+ gcry_mpi_t ciphertext = mpi_new (nbits);
+ gcry_mpi_t decr_plaintext = mpi_new (nbits);
+ gcry_mpi_t signature = mpi_new (nbits);
+
+ /* Put the relevant parameters into a public key structure. */
+ pk.n = sk->n;
+ pk.e = sk->e;
+
+ /* Create a random plaintext. */
+ _gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM);
+
+ /* Encrypt using the public key. */
+ public (ciphertext, plaintext, &pk);
+
+ /* Check that the cipher text does not match the plaintext. */
+ if (!mpi_cmp (ciphertext, plaintext))
+ goto leave; /* Ciphertext is identical to the plaintext. */
+
+ /* Decrypt using the secret key. */
+ secret (decr_plaintext, ciphertext, sk);
+
+ /* Check that the decrypted plaintext matches the original plaintext. */
+ if (mpi_cmp (decr_plaintext, plaintext))
+ goto leave; /* Plaintext does not match. */
+
+ /* Create another random plaintext as data for signature checking. */
+ _gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM);
+
+ /* Use the RSA secret function to create a signature of the plaintext. */
+ secret (signature, plaintext, sk);
+
+ /* Use the RSA public function to verify this signature. */
+ public (decr_plaintext, signature, &pk);
+ if (mpi_cmp (decr_plaintext, plaintext))
+ goto leave; /* Signature does not match. */
+
+ /* Modify the signature and check that the signing fails. */
+ mpi_add_ui (signature, signature, 1);
+ public (decr_plaintext, signature, &pk);
+ if (!mpi_cmp (decr_plaintext, plaintext))
+ goto leave; /* Signature matches but should not. */
+
+ result = 0; /* All tests succeeded. */
+
+ leave:
+ _gcry_mpi_release (signature);
+ _gcry_mpi_release (decr_plaintext);
+ _gcry_mpi_release (ciphertext);
+ _gcry_mpi_release (plaintext);
+ return result;
+}
+
+
+/* Callback used by the prime generation to test whether the exponent
+ is suitable. Returns 0 if the test has been passed. */
+static int
+check_exponent (void *arg, gcry_mpi_t a)
+{
+ gcry_mpi_t e = arg;
+ gcry_mpi_t tmp;
+ int result;
+
+ mpi_sub_ui (a, a, 1);
+ tmp = _gcry_mpi_alloc_like (a);
+ result = !mpi_gcd(tmp, e, a); /* GCD is not 1. */
+ _gcry_mpi_release (tmp);
+ mpi_add_ui (a, a, 1);
+ return result;
+}
+
+/****************
+ * Generate a key pair with a key of size NBITS.
+ * USE_E = 0 let Libcgrypt decide what exponent to use.
+ * = 1 request the use of a "secure" exponent; this is required by some
+ * specification to be 65537.
+ * > 2 Use this public exponent. If the given exponent
+ * is not odd one is internally added to it.
+ * TRANSIENT_KEY: If true, generate the primes using the standard RNG.
+ * Returns: 2 structures filled with all needed values
+ */
+static gpg_err_code_t
+generate_std (RSA_secret_key *sk, unsigned int nbits, unsigned long use_e,
+ int transient_key)
+{
+ gcry_mpi_t p, q; /* the two primes */
+ gcry_mpi_t d; /* the private key */
+ gcry_mpi_t u;
+ gcry_mpi_t t1, t2;
+ gcry_mpi_t n; /* the public key */
+ gcry_mpi_t e; /* the exponent */
+ gcry_mpi_t phi; /* helper: (p-1)(q-1) */
+ gcry_mpi_t g;
+ gcry_mpi_t f;
+ gcry_random_level_t random_level;
+
+ if (fips_mode ())
+ {
+ if (nbits < 1024)
+ return GPG_ERR_INV_VALUE;
+ if (transient_key)
+ return GPG_ERR_INV_VALUE;
+ }
+
+ /* The random quality depends on the transient_key flag. */
+ random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM;
+
+ /* Make sure that nbits is even so that we generate p, q of equal size. */
+ if ( (nbits&1) )
+ nbits++;
+
+ if (use_e == 1) /* Alias for a secure value */
+ use_e = 65537; /* as demanded by Sphinx. */
+
+ /* Public exponent:
+ In general we use 41 as this is quite fast and more secure than the
+ commonly used 17. Benchmarking the RSA verify function
+ with a 1024 bit key yields (2001-11-08):
+ e=17 0.54 ms
+ e=41 0.75 ms
+ e=257 0.95 ms
+ e=65537 1.80 ms
+ */
+ e = mpi_alloc( (32+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB );
+ if (!use_e)
+ mpi_set_ui (e, 41); /* This is a reasonable secure and fast value */
+ else
+ {
+ use_e |= 1; /* make sure this is odd */
+ mpi_set_ui (e, use_e);
+ }
+
+ n = mpi_new (nbits);
+
+ p = q = NULL;
+ do
+ {
+ /* select two (very secret) primes */
+ if (p)
+ _gcry_mpi_release (p);
+ if (q)
+ _gcry_mpi_release (q);
+ if (use_e)
+ { /* Do an extra test to ensure that the given exponent is
+ suitable. */
+ p = _gcry_generate_secret_prime (nbits/2, random_level,
+ check_exponent, e);
+ q = _gcry_generate_secret_prime (nbits/2, random_level,
+ check_exponent, e);
+ }
+ else
+ { /* We check the exponent later. */
+ p = _gcry_generate_secret_prime (nbits/2, random_level, NULL, NULL);
+ q = _gcry_generate_secret_prime (nbits/2, random_level, NULL, NULL);
+ }
+ if (mpi_cmp (p, q) > 0 ) /* p shall be smaller than q (for calc of u)*/
+ mpi_swap(p,q);
+ /* calculate the modulus */
+ mpi_mul( n, p, q );
+ }
+ while ( mpi_get_nbits(n) != nbits );
+
+ /* calculate Euler totient: phi = (p-1)(q-1) */
+ t1 = mpi_alloc_secure( mpi_get_nlimbs(p) );
+ t2 = mpi_alloc_secure( mpi_get_nlimbs(p) );
+ phi = mpi_snew ( nbits );
+ g = mpi_snew ( nbits );
+ f = mpi_snew ( nbits );
+ mpi_sub_ui( t1, p, 1 );
+ mpi_sub_ui( t2, q, 1 );
+ mpi_mul( phi, t1, t2 );
+ mpi_gcd (g, t1, t2);
+ mpi_fdiv_q(f, phi, g);
+
+ while (!mpi_gcd(t1, e, phi)) /* (while gcd is not 1) */
+ {
+ if (use_e)
+ BUG (); /* The prime generator already made sure that we
+ never can get to here. */
+ mpi_add_ui (e, e, 2);
+ }
+
+ /* calculate the secret key d = e^-1 mod phi */
+ d = mpi_snew ( nbits );
+ mpi_invm (d, e, f );
+ /* calculate the inverse of p and q (used for chinese remainder theorem)*/
+ u = mpi_snew ( nbits );
+ mpi_invm(u, p, q );
+
+ if( DBG_CIPHER )
+ {
+ log_mpidump(" p= ", p );
+ log_mpidump(" q= ", q );
+ log_mpidump("phi= ", phi );
+ log_mpidump(" g= ", g );
+ log_mpidump(" f= ", f );
+ log_mpidump(" n= ", n );
+ log_mpidump(" e= ", e );
+ log_mpidump(" d= ", d );
+ log_mpidump(" u= ", u );
+ }
+
+ _gcry_mpi_release (t1);
+ _gcry_mpi_release (t2);
+ _gcry_mpi_release (phi);
+ _gcry_mpi_release (f);
+ _gcry_mpi_release (g);
+
+ sk->n = n;
+ sk->e = e;
+ sk->p = p;
+ sk->q = q;
+ sk->d = d;
+ sk->u = u;
+
+ /* Now we can test our keys. */
+ if (test_keys (sk, nbits - 64))
+ {
+ _gcry_mpi_release (sk->n); sk->n = NULL;
+ _gcry_mpi_release (sk->e); sk->e = NULL;
+ _gcry_mpi_release (sk->p); sk->p = NULL;
+ _gcry_mpi_release (sk->q); sk->q = NULL;
+ _gcry_mpi_release (sk->d); sk->d = NULL;
+ _gcry_mpi_release (sk->u); sk->u = NULL;
+ fips_signal_error ("self-test after key generation failed");
+ return GPG_ERR_SELFTEST_FAILED;
+ }
+
+ return 0;
+}
+
+
+/****************
+ * Generate a key pair with a key of size NBITS.
+ * USE_E = 0 let Libcgrypt decide what exponent to use.
+ * = 1 request the use of a "secure" exponent; this is required by some
+ * specification to be 65537.
+ * > 2 Use this public exponent. If the given exponent
+ * is not odd one is internally added to it.
+ * TESTPARMS: If set, do not generate but test whether the p,q is probably prime
+ * Returns key with zeroes to not break code calling this function.
+ * TRANSIENT_KEY: If true, generate the primes using the standard RNG.
+ * Returns: 2 structures filled with all needed values
+ */
+static gpg_err_code_t
+generate_fips (RSA_secret_key *sk, unsigned int nbits, unsigned long use_e,
+ gcry_sexp_t testparms, int transient_key)
+{
+ gcry_mpi_t p, q; /* the two primes */
+ gcry_mpi_t d; /* the private key */
+ gcry_mpi_t u;
+ gcry_mpi_t p1, q1;
+ gcry_mpi_t n; /* the public key */
+ gcry_mpi_t e; /* the exponent */
+ gcry_mpi_t g;
+ gcry_mpi_t minp;
+ gcry_mpi_t diff, mindiff;
+ gcry_random_level_t random_level;
+ unsigned int pbits = nbits/2;
+ unsigned int i;
+ int pqswitch;
+ gpg_err_code_t ec = GPG_ERR_NO_PRIME;
+
+ if (nbits < 1024 || (nbits & 0x1FF))
+ return GPG_ERR_INV_VALUE;
+ if (_gcry_enforced_fips_mode() && nbits != 2048 && nbits != 3072)
+ return GPG_ERR_INV_VALUE;
+
+ /* The random quality depends on the transient_key flag. */
+ random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM;
+
+ if (testparms)
+ {
+ /* Parameters to derive the key are given. */
+ /* Note that we explicitly need to setup the values of tbl
+ because some compilers (e.g. OpenWatcom, IRIX) don't allow to
+ initialize a structure with automatic variables. */
+ struct { const char *name; gcry_mpi_t *value; } tbl[] = {
+ { "e" },
+ { "p" },
+ { "q" },
+ { NULL }
+ };
+ int idx;
+ gcry_sexp_t oneparm;
+
+ tbl[0].value = &e;
+ tbl[1].value = &p;
+ tbl[2].value = &q;
+
+ for (idx=0; tbl[idx].name; idx++)
+ {
+ oneparm = sexp_find_token (testparms, tbl[idx].name, 0);
+ if (oneparm)
+ {
+ *tbl[idx].value = sexp_nth_mpi (oneparm, 1, GCRYMPI_FMT_USG);
+ sexp_release (oneparm);
+ }
+ }
+ for (idx=0; tbl[idx].name; idx++)
+ if (!*tbl[idx].value)
+ break;
+ if (tbl[idx].name)
+ {
+ /* At least one parameter is missing. */
+ for (idx=0; tbl[idx].name; idx++)
+ _gcry_mpi_release (*tbl[idx].value);
+ return GPG_ERR_MISSING_VALUE;
+ }
+ }
+ else
+ {
+ if (use_e < 65537)
+ use_e = 65537; /* This is the smallest value allowed by FIPS */
+
+ e = mpi_alloc ((32+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB);
+
+ use_e |= 1; /* make sure this is odd */
+ mpi_set_ui (e, use_e);
+
+ p = mpi_snew (pbits);
+ q = mpi_snew (pbits);
+ }
+
+ n = mpi_new (nbits);
+ d = mpi_snew (nbits);
+ u = mpi_snew (nbits);
+
+ /* prepare approximate minimum p and q */
+ minp = mpi_new (pbits);
+ mpi_set_ui (minp, 0xB504F334);
+ mpi_lshift (minp, minp, pbits - 32);
+
+ /* prepare minimum p and q difference */
+ diff = mpi_new (pbits);
+ mindiff = mpi_new (pbits - 99);
+ mpi_set_ui (mindiff, 1);
+ mpi_lshift (mindiff, mindiff, pbits - 100);
+
+ p1 = mpi_snew (pbits);
+ q1 = mpi_snew (pbits);
+ g = mpi_snew (pbits);
+
+ retry:
+ /* generate p and q */
+ for (i = 0; i < 5 * pbits; i++)
+ {
+ ploop:
+ if (!testparms)
+ {
+ _gcry_mpi_randomize (p, pbits, random_level);
+ }
+ if (mpi_cmp (p, minp) < 0)
+ {
+ if (testparms)
+ goto err;
+ goto ploop;
+ }
+
+ mpi_sub_ui (p1, p, 1);
+ if (mpi_gcd (g, p1, e))
+ {
+ if (_gcry_fips186_4_prime_check (p, pbits) != GPG_ERR_NO_ERROR)
+ {
+ /* not a prime */
+ if (testparms)
+ goto err;
+ }
+ else
+ break;
+ }
+ else if (testparms)
+ goto err;
+ }
+ if (i >= 5 * pbits)
+ goto err;
+
+ for (i = 0; i < 5 * pbits; i++)
+ {
+ qloop:
+ if (!testparms)
+ {
+ _gcry_mpi_randomize (q, pbits, random_level);
+ }
+ if (mpi_cmp (q, minp) < 0)
+ {
+ if (testparms)
+ goto err;
+ goto qloop;
+ }
+ if (mpi_cmp (p, q) > 0)
+ {
+ pqswitch = 1;
+ mpi_sub (diff, p, q);
+ }
+ else
+ {
+ pqswitch = 0;
+ mpi_sub (diff, q, p);
+ }
+ if (mpi_cmp (diff, mindiff) < 0)
+ {
+ if (testparms)
+ goto err;
+ goto qloop;
+ }
+
+ mpi_sub_ui (q1, q, 1);
+ if (mpi_gcd (g, q1, e))
+ {
+ if (_gcry_fips186_4_prime_check (q, pbits) != GPG_ERR_NO_ERROR)
+ {
+ /* not a prime */
+ if (testparms)
+ goto err;
+ }
+ else
+ break;
+ }
+ else if (testparms)
+ goto err;
+ }
+ if (i >= 5 * pbits)
+ goto err;
+
+ if (testparms)
+ {
+ mpi_clear (p);
+ mpi_clear (q);
+ }
+ else
+ {
+ gcry_mpi_t f;
+
+ if (pqswitch)
+ {
+ gcry_mpi_t tmp;
+
+ tmp = p;
+ p = q;
+ q = tmp;
+ }
+
+ f = mpi_snew (nbits);
+
+ /* calculate the modulus */
+ mpi_mul (n, p, q);
+
+ /* calculate the secret key d = e^1 mod phi */
+ mpi_gcd (g, p1, q1);
+ mpi_fdiv_q (f, p1, g);
+ mpi_mul (f, f, q1);
+
+ mpi_invm (d, e, f);
+
+ _gcry_mpi_release (f);
+
+ if (mpi_get_nbits (d) < pbits)
+ goto retry;
+
+ /* calculate the inverse of p and q (used for chinese remainder theorem)*/
+ mpi_invm (u, p, q );
+ }
+
+ ec = 0;
+
+ if (DBG_CIPHER)
+ {
+ log_mpidump(" p= ", p );
+ log_mpidump(" q= ", q );
+ log_mpidump(" n= ", n );
+ log_mpidump(" e= ", e );
+ log_mpidump(" d= ", d );
+ log_mpidump(" u= ", u );
+ }
+
+ err:
+
+ _gcry_mpi_release (p1);
+ _gcry_mpi_release (q1);
+ _gcry_mpi_release (g);
+ _gcry_mpi_release (minp);
+ _gcry_mpi_release (mindiff);
+ _gcry_mpi_release (diff);
+
+ sk->n = n;
+ sk->e = e;
+ sk->p = p;
+ sk->q = q;
+ sk->d = d;
+ sk->u = u;
+
+ /* Now we can test our keys. */
+ if (ec || (!testparms && test_keys (sk, nbits - 64)))
+ {
+ _gcry_mpi_release (sk->n); sk->n = NULL;
+ _gcry_mpi_release (sk->e); sk->e = NULL;
+ _gcry_mpi_release (sk->p); sk->p = NULL;
+ _gcry_mpi_release (sk->q); sk->q = NULL;
+ _gcry_mpi_release (sk->d); sk->d = NULL;
+ _gcry_mpi_release (sk->u); sk->u = NULL;
+ if (!ec)
+ {
+ fips_signal_error ("self-test after key generation failed");
+ return GPG_ERR_SELFTEST_FAILED;
+ }
+ }
+
+ return ec;
+}
+
+
+/* Helper for generate_x931. */
+static gcry_mpi_t
+gen_x931_parm_xp (unsigned int nbits)
+{
+ gcry_mpi_t xp;
+
+ xp = mpi_snew (nbits);
+ _gcry_mpi_randomize (xp, nbits, GCRY_VERY_STRONG_RANDOM);
+
+ /* The requirement for Xp is:
+
+ sqrt{2}*2^{nbits-1} <= xp <= 2^{nbits} - 1
+
+ We set the two high order bits to 1 to satisfy the lower bound.
+ By using mpi_set_highbit we make sure that the upper bound is
+ satisfied as well. */
+ mpi_set_highbit (xp, nbits-1);
+ mpi_set_bit (xp, nbits-2);
+ gcry_assert ( mpi_get_nbits (xp) == nbits );
+
+ return xp;
+}
+
+
+/* Helper for generate_x931. */
+static gcry_mpi_t
+gen_x931_parm_xi (void)
+{
+ gcry_mpi_t xi;
+
+ xi = mpi_snew (101);
+ _gcry_mpi_randomize (xi, 101, GCRY_VERY_STRONG_RANDOM);
+ mpi_set_highbit (xi, 100);
+ gcry_assert ( mpi_get_nbits (xi) == 101 );
+
+ return xi;
+}
+
+
+
+/* Variant of the standard key generation code using the algorithm
+ from X9.31. Using this algorithm has the advantage that the
+ generation can be made deterministic which is required for CAVS
+ testing. */
+static gpg_err_code_t
+generate_x931 (RSA_secret_key *sk, unsigned int nbits, unsigned long e_value,
+ gcry_sexp_t deriveparms, int *swapped)
+{
+ gcry_mpi_t p, q; /* The two primes. */
+ gcry_mpi_t e; /* The public exponent. */
+ gcry_mpi_t n; /* The public key. */
+ gcry_mpi_t d; /* The private key */
+ gcry_mpi_t u; /* The inverse of p and q. */
+ gcry_mpi_t pm1; /* p - 1 */
+ gcry_mpi_t qm1; /* q - 1 */
+ gcry_mpi_t phi; /* Euler totient. */
+ gcry_mpi_t f, g; /* Helper. */
+
+ *swapped = 0;
+
+ if (e_value == 1) /* Alias for a secure value. */
+ e_value = 65537;
+
+ /* Point 1 of section 4.1: k = 1024 + 256s with S >= 0 */
+ if (nbits < 1024 || (nbits % 256))
+ return GPG_ERR_INV_VALUE;
+
+ /* Point 2: 2 <= bitlength(e) < 2^{k-2}
+ Note that we do not need to check the upper bound because we use
+ an unsigned long for E and thus there is no way for E to reach
+ that limit. */
+ if (e_value < 3)
+ return GPG_ERR_INV_VALUE;
+
+ /* Our implementation requires E to be odd. */
+ if (!(e_value & 1))
+ return GPG_ERR_INV_VALUE;
+
+ /* Point 3: e > 0 or e 0 if it is to be randomly generated.
+ We support only a fixed E and thus there is no need for an extra test. */
+
+
+ /* Compute or extract the derive parameters. */
+ {
+ gcry_mpi_t xp1 = NULL;
+ gcry_mpi_t xp2 = NULL;
+ gcry_mpi_t xp = NULL;
+ gcry_mpi_t xq1 = NULL;
+ gcry_mpi_t xq2 = NULL;
+ gcry_mpi_t xq = NULL;
+ gcry_mpi_t tmpval;
+
+ if (!deriveparms)
+ {
+ /* Not given: Generate them. */
+ xp = gen_x931_parm_xp (nbits/2);
+ /* Make sure that |xp - xq| > 2^{nbits - 100} holds. */
+ tmpval = mpi_snew (nbits/2);
+ do
+ {
+ _gcry_mpi_release (xq);
+ xq = gen_x931_parm_xp (nbits/2);
+ mpi_sub (tmpval, xp, xq);
+ }
+ while (mpi_get_nbits (tmpval) <= (nbits/2 - 100));
+ _gcry_mpi_release (tmpval);
+
+ xp1 = gen_x931_parm_xi ();
+ xp2 = gen_x931_parm_xi ();
+ xq1 = gen_x931_parm_xi ();
+ xq2 = gen_x931_parm_xi ();
+
+ }
+ else
+ {
+ /* Parameters to derive the key are given. */
+ /* Note that we explicitly need to setup the values of tbl
+ because some compilers (e.g. OpenWatcom, IRIX) don't allow
+ to initialize a structure with automatic variables. */
+ struct { const char *name; gcry_mpi_t *value; } tbl[] = {
+ { "Xp1" },
+ { "Xp2" },
+ { "Xp" },
+ { "Xq1" },
+ { "Xq2" },
+ { "Xq" },
+ { NULL }
+ };
+ int idx;
+ gcry_sexp_t oneparm;
+
+ tbl[0].value = &xp1;
+ tbl[1].value = &xp2;
+ tbl[2].value = &xp;
+ tbl[3].value = &xq1;
+ tbl[4].value = &xq2;
+ tbl[5].value = &xq;
+
+ for (idx=0; tbl[idx].name; idx++)
+ {
+ oneparm = sexp_find_token (deriveparms, tbl[idx].name, 0);
+ if (oneparm)
+ {
+ *tbl[idx].value = sexp_nth_mpi (oneparm, 1, GCRYMPI_FMT_USG);
+ sexp_release (oneparm);
+ }
+ }
+ for (idx=0; tbl[idx].name; idx++)
+ if (!*tbl[idx].value)
+ break;
+ if (tbl[idx].name)
+ {
+ /* At least one parameter is missing. */
+ for (idx=0; tbl[idx].name; idx++)
+ _gcry_mpi_release (*tbl[idx].value);
+ return GPG_ERR_MISSING_VALUE;
+ }
+ }
+
+ e = mpi_alloc_set_ui (e_value);
+
+ /* Find two prime numbers. */
+ p = _gcry_derive_x931_prime (xp, xp1, xp2, e, NULL, NULL);
+ q = _gcry_derive_x931_prime (xq, xq1, xq2, e, NULL, NULL);
+ _gcry_mpi_release (xp); xp = NULL;
+ _gcry_mpi_release (xp1); xp1 = NULL;
+ _gcry_mpi_release (xp2); xp2 = NULL;
+ _gcry_mpi_release (xq); xq = NULL;
+ _gcry_mpi_release (xq1); xq1 = NULL;
+ _gcry_mpi_release (xq2); xq2 = NULL;
+ if (!p || !q)
+ {
+ _gcry_mpi_release (p);
+ _gcry_mpi_release (q);
+ _gcry_mpi_release (e);
+ return GPG_ERR_NO_PRIME;
+ }
+ }
+
+
+ /* Compute the public modulus. We make sure that p is smaller than
+ q to allow the use of the CRT. */
+ if (mpi_cmp (p, q) > 0 )
+ {
+ mpi_swap (p, q);
+ *swapped = 1;
+ }
+ n = mpi_new (nbits);
+ mpi_mul (n, p, q);
+
+ /* Compute the Euler totient: phi = (p-1)(q-1) */
+ pm1 = mpi_snew (nbits/2);
+ qm1 = mpi_snew (nbits/2);
+ phi = mpi_snew (nbits);
+ mpi_sub_ui (pm1, p, 1);
+ mpi_sub_ui (qm1, q, 1);
+ mpi_mul (phi, pm1, qm1);
+
+ g = mpi_snew (nbits);
+ gcry_assert (mpi_gcd (g, e, phi));
+
+ /* Compute: f = lcm(p-1,q-1) = phi / gcd(p-1,q-1) */
+ mpi_gcd (g, pm1, qm1);
+ f = pm1; pm1 = NULL;
+ _gcry_mpi_release (qm1); qm1 = NULL;
+ mpi_fdiv_q (f, phi, g);
+ _gcry_mpi_release (phi); phi = NULL;
+ d = g; g = NULL;
+ /* Compute the secret key: d = e^{-1} mod lcm(p-1,q-1) */
+ mpi_invm (d, e, f);
+
+ /* Compute the inverse of p and q. */
+ u = f; f = NULL;
+ mpi_invm (u, p, q );
+
+ if( DBG_CIPHER )
+ {
+ if (*swapped)
+ log_debug ("p and q are swapped\n");
+ log_mpidump(" p", p );
+ log_mpidump(" q", q );
+ log_mpidump(" n", n );
+ log_mpidump(" e", e );
+ log_mpidump(" d", d );
+ log_mpidump(" u", u );
+ }
+
+
+ sk->n = n;
+ sk->e = e;
+ sk->p = p;
+ sk->q = q;
+ sk->d = d;
+ sk->u = u;
+
+ /* Now we can test our keys. */
+ if (test_keys (sk, nbits - 64))
+ {
+ _gcry_mpi_release (sk->n); sk->n = NULL;
+ _gcry_mpi_release (sk->e); sk->e = NULL;
+ _gcry_mpi_release (sk->p); sk->p = NULL;
+ _gcry_mpi_release (sk->q); sk->q = NULL;
+ _gcry_mpi_release (sk->d); sk->d = NULL;
+ _gcry_mpi_release (sk->u); sk->u = NULL;
+ fips_signal_error ("self-test after key generation failed");
+ return GPG_ERR_SELFTEST_FAILED;
+ }
+
+ return 0;
+}
+
+
+/****************
+ * Test whether the secret key is valid.
+ * Returns: true if this is a valid key.
+ */
+static int
+check_secret_key( RSA_secret_key *sk )
+{
+ int rc;
+ gcry_mpi_t temp = mpi_alloc( mpi_get_nlimbs(sk->p)*2 );
+
+ mpi_mul(temp, sk->p, sk->q );
+ rc = mpi_cmp( temp, sk->n );
+ mpi_free(temp);
+ return !rc;
+}
+
+
+
+/****************
+ * Public key operation. Encrypt INPUT with PKEY and put result into OUTPUT.
+ *
+ * c = m^e mod n
+ *
+ * Where c is OUTPUT, m is INPUT and e,n are elements of PKEY.
+ */
+static void
+public(gcry_mpi_t output, gcry_mpi_t input, RSA_public_key *pkey )
+{
+ if( output == input ) /* powm doesn't like output and input the same */
+ {
+ gcry_mpi_t x = mpi_alloc( mpi_get_nlimbs(input)*2 );
+ mpi_powm( x, input, pkey->e, pkey->n );
+ mpi_set(output, x);
+ mpi_free(x);
+ }
+ else
+ mpi_powm( output, input, pkey->e, pkey->n );
+}
+
+#if 0
+static void
+stronger_key_check ( RSA_secret_key *skey )
+{
+ gcry_mpi_t t = mpi_alloc_secure ( 0 );
+ gcry_mpi_t t1 = mpi_alloc_secure ( 0 );
+ gcry_mpi_t t2 = mpi_alloc_secure ( 0 );
+ gcry_mpi_t phi = mpi_alloc_secure ( 0 );
+
+ /* check that n == p * q */
+ mpi_mul( t, skey->p, skey->q);
+ if (mpi_cmp( t, skey->n) )
+ log_info ( "RSA Oops: n != p * q\n" );
+
+ /* check that p is less than q */
+ if( mpi_cmp( skey->p, skey->q ) > 0 )
+ {
+ log_info ("RSA Oops: p >= q - fixed\n");
+ _gcry_mpi_swap ( skey->p, skey->q);
+ }
+
+ /* check that e divides neither p-1 nor q-1 */
+ mpi_sub_ui(t, skey->p, 1 );
+ mpi_fdiv_r(t, t, skey->e );
+ if ( !mpi_cmp_ui( t, 0) )
+ log_info ( "RSA Oops: e divides p-1\n" );
+ mpi_sub_ui(t, skey->q, 1 );
+ mpi_fdiv_r(t, t, skey->e );
+ if ( !mpi_cmp_ui( t, 0) )
+ log_info ( "RSA Oops: e divides q-1\n" );
+
+ /* check that d is correct */
+ mpi_sub_ui( t1, skey->p, 1 );
+ mpi_sub_ui( t2, skey->q, 1 );
+ mpi_mul( phi, t1, t2 );
+ gcry_mpi_gcd(t, t1, t2);
+ mpi_fdiv_q(t, phi, t);
+ mpi_invm(t, skey->e, t );
+ if ( mpi_cmp(t, skey->d ) )
+ {
+ log_info ( "RSA Oops: d is wrong - fixed\n");
+ mpi_set (skey->d, t);
+ log_printmpi (" fixed d", skey->d);
+ }
+
+ /* check for correctness of u */
+ mpi_invm(t, skey->p, skey->q );
+ if ( mpi_cmp(t, skey->u ) )
+ {
+ log_info ( "RSA Oops: u is wrong - fixed\n");
+ mpi_set (skey->u, t);
+ log_printmpi (" fixed u", skey->u);
+ }
+
+ log_info ( "RSA secret key check finished\n");
+
+ mpi_free (t);
+ mpi_free (t1);
+ mpi_free (t2);
+ mpi_free (phi);
+}
+#endif
+
+
+
+/* Secret key operation - standard version.
+ *
+ * m = c^d mod n
+ */
+static void
+secret_core_std (gcry_mpi_t M, gcry_mpi_t C,
+ gcry_mpi_t D, gcry_mpi_t N)
+{
+ mpi_powm (M, C, D, N);
+}
+
+
+/* Secret key operation - using the CRT.
+ *
+ * m1 = c ^ (d mod (p-1)) mod p
+ * m2 = c ^ (d mod (q-1)) mod q
+ * h = u * (m2 - m1) mod q
+ * m = m1 + h * p
+ */
+static void
+secret_core_crt (gcry_mpi_t M, gcry_mpi_t C,
+ gcry_mpi_t D, unsigned int Nlimbs,
+ gcry_mpi_t P, gcry_mpi_t Q, gcry_mpi_t U)
+{
+ gcry_mpi_t m1 = mpi_alloc_secure ( Nlimbs + 1 );
+ gcry_mpi_t m2 = mpi_alloc_secure ( Nlimbs + 1 );
+ gcry_mpi_t h = mpi_alloc_secure ( Nlimbs + 1 );
+ gcry_mpi_t D_blind = mpi_alloc_secure ( Nlimbs + 1 );
+ gcry_mpi_t r;
+ unsigned int r_nbits;
+
+ r_nbits = mpi_get_nbits (P) / 4;
+ if (r_nbits < 96)
+ r_nbits = 96;
+ r = mpi_secure_new (r_nbits);
+
+ /* d_blind = (d mod (p-1)) + (p-1) * r */
+ /* m1 = c ^ d_blind mod p */
+ _gcry_mpi_randomize (r, r_nbits, GCRY_WEAK_RANDOM);
+ mpi_set_highbit (r, r_nbits - 1);
+ mpi_sub_ui ( h, P, 1 );
+ mpi_mul ( D_blind, h, r );
+ mpi_fdiv_r ( h, D, h );
+ mpi_add ( D_blind, D_blind, h );
+ mpi_powm ( m1, C, D_blind, P );
+
+ /* d_blind = (d mod (q-1)) + (q-1) * r */
+ /* m2 = c ^ d_blind mod q */
+ _gcry_mpi_randomize (r, r_nbits, GCRY_WEAK_RANDOM);
+ mpi_set_highbit (r, r_nbits - 1);
+ mpi_sub_ui ( h, Q, 1 );
+ mpi_mul ( D_blind, h, r );
+ mpi_fdiv_r ( h, D, h );
+ mpi_add ( D_blind, D_blind, h );
+ mpi_powm ( m2, C, D_blind, Q );
+
+ mpi_free ( r );
+ mpi_free ( D_blind );
+
+ /* h = u * ( m2 - m1 ) mod q */
+ mpi_sub ( h, m2, m1 );
+ if ( mpi_has_sign ( h ) )
+ mpi_add ( h, h, Q );
+ mpi_mulm ( h, U, h, Q );
+
+ /* m = m1 + h * p */
+ mpi_mul ( h, h, P );
+ mpi_add ( M, m1, h );
+
+ mpi_free ( h );
+ mpi_free ( m1 );
+ mpi_free ( m2 );
+}
+
+
+/* Secret key operation.
+ * Encrypt INPUT with SKEY and put result into
+ * OUTPUT. SKEY has the secret key parameters.
+ */
+static void
+secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey )
+{
+ /* Remove superfluous leading zeroes from INPUT. */
+ mpi_normalize (input);
+
+ if (!skey->p || !skey->q || !skey->u)
+ {
+ secret_core_std (output, input, skey->d, skey->n);
+ }
+ else
+ {
+ secret_core_crt (output, input, skey->d, mpi_get_nlimbs (skey->n),
+ skey->p, skey->q, skey->u);
+ }
+}
+
+
+static void
+secret_blinded (gcry_mpi_t output, gcry_mpi_t input,
+ RSA_secret_key *sk, unsigned int nbits)
+{
+ gcry_mpi_t r; /* Random number needed for blinding. */
+ gcry_mpi_t ri; /* Modular multiplicative inverse of r. */
+ gcry_mpi_t bldata; /* Blinded data to decrypt. */
+
+ /* First, we need a random number r between 0 and n - 1, which is
+ * relatively prime to n (i.e. it is neither p nor q). The random
+ * number needs to be only unpredictable, thus we employ the
+ * gcry_create_nonce function by using GCRY_WEAK_RANDOM with
+ * gcry_mpi_randomize. */
+ r = mpi_snew (nbits);
+ ri = mpi_snew (nbits);
+ bldata = mpi_snew (nbits);
+
+ do
+ {
+ _gcry_mpi_randomize (r, nbits, GCRY_WEAK_RANDOM);
+ mpi_mod (r, r, sk->n);
+ }
+ while (!mpi_invm (ri, r, sk->n));
+
+ /* Do blinding. We calculate: y = (x * r^e) mod n, where r is the
+ * random number, e is the public exponent, x is the non-blinded
+ * input data and n is the RSA modulus. */
+ mpi_powm (bldata, r, sk->e, sk->n);
+ mpi_mulm (bldata, bldata, input, sk->n);
+
+ /* Perform decryption. */
+ secret (output, bldata, sk);
+ _gcry_mpi_release (bldata);
+
+ /* Undo blinding. Here we calculate: y = (x * r^-1) mod n, where x
+ * is the blinded decrypted data, ri is the modular multiplicative
+ * inverse of r and n is the RSA modulus. */
+ mpi_mulm (output, output, ri, sk->n);
+
+ _gcry_mpi_release (r);
+ _gcry_mpi_release (ri);
+}
+
+
+/*********************************************
+ ************** interface ******************
+ *********************************************/
+
+static gcry_err_code_t
+rsa_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
+{
+ gpg_err_code_t ec;
+ unsigned int nbits;
+ unsigned long evalue;
+ RSA_secret_key sk;
+ gcry_sexp_t deriveparms;
+ int flags = 0;
+ gcry_sexp_t l1;
+ gcry_sexp_t swap_info = NULL;
+
+ memset (&sk, 0, sizeof sk);
+
+ ec = _gcry_pk_util_get_nbits (genparms, &nbits);
+ if (ec)
+ return ec;
+
+ ec = _gcry_pk_util_get_rsa_use_e (genparms, &evalue);
+ if (ec)
+ return ec;
+
+ /* Parse the optional flags list. */
+ l1 = sexp_find_token (genparms, "flags", 0);
+ if (l1)
+ {
+ ec = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+ sexp_release (l1);
+ if (ec)
+ return ec;
+ }
+
+ deriveparms = (genparms?
+ sexp_find_token (genparms, "derive-parms", 0) : NULL);
+ if (!deriveparms)
+ {
+ /* Parse the optional "use-x931" flag. */
+ l1 = sexp_find_token (genparms, "use-x931", 0);
+ if (l1)
+ {
+ flags |= PUBKEY_FLAG_USE_X931;
+ sexp_release (l1);
+ }
+ }
+
+ if (deriveparms || (flags & PUBKEY_FLAG_USE_X931))
+ {
+ int swapped;
+ ec = generate_x931 (&sk, nbits, evalue, deriveparms, &swapped);
+ sexp_release (deriveparms);
+ if (!ec && swapped)
+ ec = sexp_new (&swap_info, "(misc-key-info(p-q-swapped))", 0, 1);
+ }
+ else
+ {
+ /* Parse the optional "transient-key" flag. */
+ if (!(flags & PUBKEY_FLAG_TRANSIENT_KEY))
+ {
+ l1 = sexp_find_token (genparms, "transient-key", 0);
+ if (l1)
+ {
+ flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+ sexp_release (l1);
+ }
+ }
+ deriveparms = (genparms? sexp_find_token (genparms, "test-parms", 0)
+ /**/ : NULL);
+
+ /* Generate. */
+ if (deriveparms || fips_mode())
+ {
+ ec = generate_fips (&sk, nbits, evalue, deriveparms,
+ !!(flags & PUBKEY_FLAG_TRANSIENT_KEY));
+ }
+ else
+ {
+ ec = generate_std (&sk, nbits, evalue,
+ !!(flags & PUBKEY_FLAG_TRANSIENT_KEY));
+ }
+ sexp_release (deriveparms);
+ }
+
+ if (!ec)
+ {
+ ec = sexp_build (r_skey, NULL,
+ "(key-data"
+ " (public-key"
+ " (rsa(n%m)(e%m)))"
+ " (private-key"
+ " (rsa(n%m)(e%m)(d%m)(p%m)(q%m)(u%m)))"
+ " %S)",
+ sk.n, sk.e,
+ sk.n, sk.e, sk.d, sk.p, sk.q, sk.u,
+ swap_info);
+ }
+
+ mpi_free (sk.n);
+ mpi_free (sk.e);
+ mpi_free (sk.p);
+ mpi_free (sk.q);
+ mpi_free (sk.d);
+ mpi_free (sk.u);
+ sexp_release (swap_info);
+
+ return ec;
+}
+
+
+static gcry_err_code_t
+rsa_check_secret_key (gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL};
+
+ /* To check the key we need the optional parameters. */
+ rc = sexp_extract_param (keyparms, NULL, "nedpqu",
+ &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u,
+ NULL);
+ if (rc)
+ goto leave;
+
+ if (!check_secret_key (&sk))
+ rc = GPG_ERR_BAD_SECKEY;
+
+ leave:
+ _gcry_mpi_release (sk.n);
+ _gcry_mpi_release (sk.e);
+ _gcry_mpi_release (sk.d);
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.q);
+ _gcry_mpi_release (sk.u);
+ if (DBG_CIPHER)
+ log_debug ("rsa_testkey => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+rsa_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_mpi_t data = NULL;
+ RSA_public_key pk = {NULL, NULL};
+ gcry_mpi_t ciph = NULL;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT,
+ rsa_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_mpidump ("rsa_encrypt data", data);
+ if (!data || mpi_is_opaque (data))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the key. */
+ rc = sexp_extract_param (keyparms, NULL, "ne", &pk.n, &pk.e, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_mpidump ("rsa_encrypt n", pk.n);
+ log_mpidump ("rsa_encrypt e", pk.e);
+ }
+
+ /* Do RSA computation and build result. */
+ ciph = mpi_new (0);
+ public (ciph, data, &pk);
+ if (DBG_CIPHER)
+ log_mpidump ("rsa_encrypt res", ciph);
+ if ((ctx.flags & PUBKEY_FLAG_FIXEDLEN))
+ {
+ /* We need to make sure to return the correct length to avoid
+ problems with missing leading zeroes. */
+ unsigned char *em;
+ size_t emlen = (mpi_get_nbits (pk.n)+7)/8;
+
+ rc = _gcry_mpi_to_octet_string (&em, NULL, ciph, emlen);
+ if (!rc)
+ {
+ rc = sexp_build (r_ciph, NULL, "(enc-val(rsa(a%b)))", (int)emlen, em);
+ xfree (em);
+ }
+ }
+ else
+ rc = sexp_build (r_ciph, NULL, "(enc-val(rsa(a%m)))", ciph);
+
+ leave:
+ _gcry_mpi_release (ciph);
+ _gcry_mpi_release (pk.n);
+ _gcry_mpi_release (pk.e);
+ _gcry_mpi_release (data);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("rsa_encrypt => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+rsa_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+
+{
+ gpg_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_sexp_t l1 = NULL;
+ gcry_mpi_t data = NULL;
+ RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL};
+ gcry_mpi_t plain = NULL;
+ unsigned char *unpad = NULL;
+ size_t unpadlen = 0;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT,
+ rsa_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_preparse_encval (s_data, rsa_names, &l1, &ctx);
+ if (rc)
+ goto leave;
+ rc = sexp_extract_param (l1, NULL, "a", &data, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printmpi ("rsa_decrypt data", data);
+ if (mpi_is_opaque (data))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the key. */
+ rc = sexp_extract_param (keyparms, NULL, "nedp?q?u?",
+ &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u,
+ NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_printmpi ("rsa_decrypt n", sk.n);
+ log_printmpi ("rsa_decrypt e", sk.e);
+ if (!fips_mode ())
+ {
+ log_printmpi ("rsa_decrypt d", sk.d);
+ log_printmpi ("rsa_decrypt p", sk.p);
+ log_printmpi ("rsa_decrypt q", sk.q);
+ log_printmpi ("rsa_decrypt u", sk.u);
+ }
+ }
+
+ /* Better make sure that there are no superfluous leading zeroes in
+ the input and it has not been "padded" using multiples of N.
+ This mitigates side-channel attacks (CVE-2013-4576). */
+ mpi_normalize (data);
+ mpi_fdiv_r (data, data, sk.n);
+
+ /* Allocate MPI for the plaintext. */
+ plain = mpi_snew (ctx.nbits);
+
+ /* We use blinding by default to mitigate timing attacks which can
+ be practically mounted over the network as shown by Brumley and
+ Boney in 2003. */
+ if ((ctx.flags & PUBKEY_FLAG_NO_BLINDING))
+ secret (plain, data, &sk);
+ else
+ secret_blinded (plain, data, &sk, ctx.nbits);
+
+ if (DBG_CIPHER)
+ log_printmpi ("rsa_decrypt res", plain);
+
+ /* Reverse the encoding and build the s-expression. */
+ switch (ctx.encoding)
+ {
+ case PUBKEY_ENC_PKCS1:
+ rc = _gcry_rsa_pkcs1_decode_for_enc (&unpad, &unpadlen, ctx.nbits, plain);
+ mpi_free (plain);
+ plain = NULL;
+ if (!rc)
+ rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad);
+ break;
+
+ case PUBKEY_ENC_OAEP:
+ rc = _gcry_rsa_oaep_decode (&unpad, &unpadlen,
+ ctx.nbits, ctx.hash_algo,
+ plain, ctx.label, ctx.labellen);
+ mpi_free (plain);
+ plain = NULL;
+ if (!rc)
+ rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad);
+ break;
+
+ default:
+ /* Raw format. For backward compatibility we need to assume a
+ signed mpi by using the sexp format string "%m". */
+ rc = sexp_build (r_plain, NULL,
+ (ctx.flags & PUBKEY_FLAG_LEGACYRESULT)
+ ? "%m":"(value %m)", plain);
+ break;
+ }
+
+ leave:
+ xfree (unpad);
+ _gcry_mpi_release (plain);
+ _gcry_mpi_release (sk.n);
+ _gcry_mpi_release (sk.e);
+ _gcry_mpi_release (sk.d);
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.q);
+ _gcry_mpi_release (sk.u);
+ _gcry_mpi_release (data);
+ sexp_release (l1);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("rsa_decrypt => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+rsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gpg_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_mpi_t data = NULL;
+ RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL};
+ RSA_public_key pk;
+ gcry_mpi_t sig = NULL;
+ gcry_mpi_t result = NULL;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN,
+ rsa_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printmpi ("rsa_sign data", data);
+ if (mpi_is_opaque (data))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the key. */
+ rc = sexp_extract_param (keyparms, NULL, "nedp?q?u?",
+ &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u,
+ NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_printmpi ("rsa_sign n", sk.n);
+ log_printmpi ("rsa_sign e", sk.e);
+ if (!fips_mode ())
+ {
+ log_printmpi ("rsa_sign d", sk.d);
+ log_printmpi ("rsa_sign p", sk.p);
+ log_printmpi ("rsa_sign q", sk.q);
+ log_printmpi ("rsa_sign u", sk.u);
+ }
+ }
+
+ /* Do RSA computation. */
+ sig = mpi_new (0);
+ if ((ctx.flags & PUBKEY_FLAG_NO_BLINDING))
+ secret (sig, data, &sk);
+ else
+ secret_blinded (sig, data, &sk, ctx.nbits);
+ if (DBG_CIPHER)
+ log_printmpi ("rsa_sign res", sig);
+
+ /* Check that the created signature is good. This detects a failure
+ of the CRT algorithm (Lenstra's attack on RSA's use of the CRT). */
+ result = mpi_new (0);
+ pk.n = sk.n;
+ pk.e = sk.e;
+ public (result, sig, &pk);
+ if (mpi_cmp (result, data))
+ {
+ rc = GPG_ERR_BAD_SIGNATURE;
+ goto leave;
+ }
+
+ /* Convert the result. */
+ if ((ctx.flags & PUBKEY_FLAG_FIXEDLEN))
+ {
+ /* We need to make sure to return the correct length to avoid
+ problems with missing leading zeroes. */
+ unsigned char *em;
+ size_t emlen = (mpi_get_nbits (sk.n)+7)/8;
+
+ rc = _gcry_mpi_to_octet_string (&em, NULL, sig, emlen);
+ if (!rc)
+ {
+ rc = sexp_build (r_sig, NULL, "(sig-val(rsa(s%b)))", (int)emlen, em);
+ xfree (em);
+ }
+ }
+ else
+ rc = sexp_build (r_sig, NULL, "(sig-val(rsa(s%M)))", sig);
+
+
+ leave:
+ _gcry_mpi_release (result);
+ _gcry_mpi_release (sig);
+ _gcry_mpi_release (sk.n);
+ _gcry_mpi_release (sk.e);
+ _gcry_mpi_release (sk.d);
+ _gcry_mpi_release (sk.p);
+ _gcry_mpi_release (sk.q);
+ _gcry_mpi_release (sk.u);
+ _gcry_mpi_release (data);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("rsa_sign => %s\n", gpg_strerror (rc));
+ return rc;
+}
+
+
+static gcry_err_code_t
+rsa_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+ gcry_err_code_t rc;
+ struct pk_encoding_ctx ctx;
+ gcry_sexp_t l1 = NULL;
+ gcry_mpi_t sig = NULL;
+ gcry_mpi_t data = NULL;
+ RSA_public_key pk = { NULL, NULL };
+ gcry_mpi_t result = NULL;
+
+ _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY,
+ rsa_get_nbits (keyparms));
+
+ /* Extract the data. */
+ rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printmpi ("rsa_verify data", data);
+ if (mpi_is_opaque (data))
+ {
+ rc = GPG_ERR_INV_DATA;
+ goto leave;
+ }
+
+ /* Extract the signature value. */
+ rc = _gcry_pk_util_preparse_sigval (s_sig, rsa_names, &l1, NULL);
+ if (rc)
+ goto leave;
+ rc = sexp_extract_param (l1, NULL, "s", &sig, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ log_printmpi ("rsa_verify sig", sig);
+
+ /* Extract the key. */
+ rc = sexp_extract_param (keyparms, NULL, "ne", &pk.n, &pk.e, NULL);
+ if (rc)
+ goto leave;
+ if (DBG_CIPHER)
+ {
+ log_printmpi ("rsa_verify n", pk.n);
+ log_printmpi ("rsa_verify e", pk.e);
+ }
+
+ /* Do RSA computation and compare. */
+ result = mpi_new (0);
+ public (result, sig, &pk);
+ if (DBG_CIPHER)
+ log_printmpi ("rsa_verify cmp", result);
+ if (ctx.verify_cmp)
+ rc = ctx.verify_cmp (&ctx, result);
+ else
+ rc = mpi_cmp (result, data) ? GPG_ERR_BAD_SIGNATURE : 0;
+
+ leave:
+ _gcry_mpi_release (result);
+ _gcry_mpi_release (pk.n);
+ _gcry_mpi_release (pk.e);
+ _gcry_mpi_release (data);
+ _gcry_mpi_release (sig);
+ sexp_release (l1);
+ _gcry_pk_util_free_encoding_ctx (&ctx);
+ if (DBG_CIPHER)
+ log_debug ("rsa_verify => %s\n", rc?gpg_strerror (rc):"Good");
+ return rc;
+}
+
+
+
+/* Return the number of bits for the key described by PARMS. On error
+ * 0 is returned. The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ * (rsa
+ * (n <mpi>)
+ * (e <mpi>))
+ *
+ * More parameters may be given but we only need N here.
+ */
+static unsigned int
+rsa_get_nbits (gcry_sexp_t parms)
+{
+ gcry_sexp_t l1;
+ gcry_mpi_t n;
+ unsigned int nbits;
+
+ l1 = sexp_find_token (parms, "n", 1);
+ if (!l1)
+ return 0; /* Parameter N not found. */
+
+ n = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+ sexp_release (l1);
+ nbits = n? mpi_get_nbits (n) : 0;
+ _gcry_mpi_release (n);
+ return nbits;
+}
+
+
+/* Compute a keygrip. MD is the hash context which we are going to
+ update. KEYPARAM is an S-expression with the key parameters, this
+ is usually a public key but may also be a secret key. An example
+ of such an S-expression is:
+
+ (rsa
+ (n #00B...#)
+ (e #010001#))
+
+ PKCS-15 says that for RSA only the modulus should be hashed -
+ however, it is not clear whether this is meant to use the raw bytes
+ (assuming this is an unsigned integer) or whether the DER required
+ 0 should be prefixed. We hash the raw bytes. */
+static gpg_err_code_t
+compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparam)
+{
+ gcry_sexp_t l1;
+ const char *data;
+ size_t datalen;
+
+ l1 = sexp_find_token (keyparam, "n", 1);
+ if (!l1)
+ return GPG_ERR_NO_OBJ;
+
+ data = sexp_nth_data (l1, 1, &datalen);
+ if (!data)
+ {
+ sexp_release (l1);
+ return GPG_ERR_NO_OBJ;
+ }
+
+ _gcry_md_write (md, data, datalen);
+ sexp_release (l1);
+
+ return 0;
+}
+
+
+
+
+/*
+ Self-test section.
+ */
+
+static const char *
+selftest_sign_2048 (gcry_sexp_t pkey, gcry_sexp_t skey)
+{
+ static const char sample_data[] =
+ "(data (flags pkcs1)"
+ " (hash sha256 #11223344556677889900aabbccddeeff"
+ /**/ "102030405060708090a0b0c0d0f01121#))";
+ static const char sample_data_bad[] =
+ "(data (flags pkcs1)"
+ " (hash sha256 #11223344556677889900aabbccddeeff"
+ /**/ "802030405060708090a0b0c0d0f01121#))";
+
+ const char *errtxt = NULL;
+ gcry_error_t err;
+ gcry_sexp_t data = NULL;
+ gcry_sexp_t data_bad = NULL;
+ gcry_sexp_t sig = NULL;
+ /* raw signature data reference */
+ const char ref_data[] =
+ "6252a19a11e1d5155ed9376036277193d644fa239397fff03e9b92d6f86415d6"
+ "d30da9273775f290e580d038295ff8ff89522becccfa6ae870bf76b76df402a8"
+ "54f69347e3db3de8e1e7d4dada281ec556810c7a8ecd0b5f51f9b1c0e7aa7557"
+ "61aa2b8ba5f811304acc6af0eca41fe49baf33bf34eddaf44e21e036ac7f0b68"
+ "03cdef1c60021fb7b5b97ebacdd88ab755ce29af568dbc5728cc6e6eff42618d"
+ "62a0386ca8beed46402bdeeef29b6a3feded906bace411a06a39192bf516ae10"
+ "67e4320fa8ea113968525f4574d022a3ceeaafdc41079efe1f22cc94bf59d8d3"
+ "328085da9674857db56de5978a62394aab48aa3b72e23a1b16260cfd9daafe65";
+ gcry_mpi_t ref_mpi = NULL;
+ gcry_mpi_t sig_mpi = NULL;
+
+ err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data));
+ if (!err)
+ err = sexp_sscan (&data_bad, NULL,
+ sample_data_bad, strlen (sample_data_bad));
+ if (err)
+ {
+ errtxt = "converting data failed";
+ goto leave;
+ }
+
+ err = _gcry_pk_sign (&sig, data, skey);
+ if (err)
+ {
+ errtxt = "signing failed";
+ goto leave;
+ }
+
+ err = _gcry_mpi_scan(&ref_mpi, GCRYMPI_FMT_HEX, ref_data, 0, NULL);
+ if (err)
+ {
+ errtxt = "converting ref_data to mpi failed";
+ goto leave;
+ }
+
+ err = _gcry_sexp_extract_param(sig, "sig-val!rsa", "s", &sig_mpi, NULL);
+ if (err)
+ {
+ errtxt = "extracting signature data failed";
+ goto leave;
+ }
+
+ if (mpi_cmp (sig_mpi, ref_mpi))
+ {
+ errtxt = "signature does not match reference data";
+ goto leave;
+ }
+
+ err = _gcry_pk_verify (sig, data, pkey);
+ if (err)
+ {
+ errtxt = "verify failed";
+ goto leave;
+ }
+ err = _gcry_pk_verify (sig, data_bad, pkey);
+ if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
+ {
+ errtxt = "bad signature not detected";
+ goto leave;
+ }
+
+
+ leave:
+ sexp_release (sig);
+ sexp_release (data_bad);
+ sexp_release (data);
+ _gcry_mpi_release (ref_mpi);
+ _gcry_mpi_release (sig_mpi);
+ return errtxt;
+}
+
+
+
+/* Given an S-expression ENCR_DATA of the form:
+
+ (enc-val
+ (rsa
+ (a a-value)))
+
+ as returned by gcry_pk_decrypt, return the the A-VALUE. On error,
+ return NULL. */
+static gcry_mpi_t
+extract_a_from_sexp (gcry_sexp_t encr_data)
+{
+ gcry_sexp_t l1, l2, l3;
+ gcry_mpi_t a_value;
+
+ l1 = sexp_find_token (encr_data, "enc-val", 0);
+ if (!l1)
+ return NULL;
+ l2 = sexp_find_token (l1, "rsa", 0);
+ sexp_release (l1);
+ if (!l2)
+ return NULL;
+ l3 = sexp_find_token (l2, "a", 0);
+ sexp_release (l2);
+ if (!l3)
+ return NULL;
+ a_value = sexp_nth_mpi (l3, 1, 0);
+ sexp_release (l3);
+
+ return a_value;
+}
+
+
+static const char *
+selftest_encr_2048 (gcry_sexp_t pkey, gcry_sexp_t skey)
+{
+ const char *errtxt = NULL;
+ gcry_error_t err;
+ static const char plaintext[] =
+ "Jim quickly realized that the beautiful gowns are expensive.";
+ gcry_sexp_t plain = NULL;
+ gcry_sexp_t encr = NULL;
+ gcry_mpi_t ciphertext = NULL;
+ gcry_sexp_t decr = NULL;
+ char *decr_plaintext = NULL;
+ gcry_sexp_t tmplist = NULL;
+ /* expected result of encrypting the plaintext with sample_secret_key */
+ static const char ref_data[] =
+ "18022e2593a402a737caaa93b4c7e750e20ca265452980e1d6b7710fbd3e"
+ "7dce72be5c2110fb47691cb38f42170ee3b4a37f2498d4a51567d762585e"
+ "4cb81d04fbc7df4144f8e5eac2d4b8688521b64011f11d7ad53f4c874004"
+ "819856f2e2a6f83d1c9c4e73ac26089789c14482b0b8d44139133c88c4a5"
+ "2dba9dd6d6ffc622666b7d129168333d999706af30a2d7d272db7734e5ed"
+ "fb8c64ea3018af3ad20f4a013a5060cb0f5e72753967bebe294280a6ed0d"
+ "dbd3c4f11d0a8696e9d32a0dc03deb0b5e49b2cbd1503392642d4e1211f3"
+ "e8e2ee38abaa3671ccd57fcde8ca76e85fd2cb77c35706a970a213a27352"
+ "cec92a9604d543ddb5fc478ff50e0622";
+ gcry_mpi_t ref_mpi = NULL;
+
+ /* Put the plaintext into an S-expression. */
+ err = sexp_build (&plain, NULL, "(data (flags raw) (value %s))", plaintext);
+ if (err)
+ {
+ errtxt = "converting data failed";
+ goto leave;
+ }
+
+ /* Encrypt. */
+ err = _gcry_pk_encrypt (&encr, plain, pkey);
+ if (err)
+ {
+ errtxt = "encrypt failed";
+ goto leave;
+ }
+
+ err = _gcry_mpi_scan(&ref_mpi, GCRYMPI_FMT_HEX, ref_data, 0, NULL);
+ if (err)
+ {
+ errtxt = "converting encrydata to mpi failed";
+ goto leave;
+ }
+
+ /* Extraxt the ciphertext from the returned S-expression. */
+ /*sexp_dump (encr);*/
+ ciphertext = extract_a_from_sexp (encr);
+ if (!ciphertext)
+ {
+ errtxt = "gcry_pk_decrypt returned garbage";
+ goto leave;
+ }
+
+ /* Check that the ciphertext does no match the plaintext. */
+ /* _gcry_log_printmpi ("plaintext", plaintext); */
+ /* _gcry_log_printmpi ("ciphertxt", ciphertext); */
+ if (mpi_cmp (ref_mpi, ciphertext))
+ {
+ errtxt = "ciphertext doesn't match reference data";
+ goto leave;
+ }
+
+ /* Decrypt. */
+ err = _gcry_pk_decrypt (&decr, encr, skey);
+ if (err)
+ {
+ errtxt = "decrypt failed";
+ goto leave;
+ }
+
+ /* Extract the decrypted data from the S-expression. Note that the
+ output of gcry_pk_decrypt depends on whether a flags lists occurs
+ in its input data. Because we passed the output of
+ gcry_pk_encrypt directly to gcry_pk_decrypt, such a flag value
+ won't be there as of today. To be prepared for future changes we
+ take care of it anyway. */
+ tmplist = sexp_find_token (decr, "value", 0);
+ if (tmplist)
+ decr_plaintext = sexp_nth_string (tmplist, 1);
+ else
+ decr_plaintext = sexp_nth_string (decr, 0);
+ if (!decr_plaintext)
+ {
+ errtxt = "decrypt returned no plaintext";
+ goto leave;
+ }
+
+ /* Check that the decrypted plaintext matches the original plaintext. */
+ if (strcmp (plaintext, decr_plaintext))
+ {
+ errtxt = "mismatch";
+ goto leave;
+ }
+
+ leave:
+ sexp_release (tmplist);
+ xfree (decr_plaintext);
+ sexp_release (decr);
+ _gcry_mpi_release (ciphertext);
+ _gcry_mpi_release (ref_mpi);
+ sexp_release (encr);
+ sexp_release (plain);
+ return errtxt;
+}
+
+
+static gpg_err_code_t
+selftests_rsa (selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+ gcry_error_t err;
+ gcry_sexp_t skey = NULL;
+ gcry_sexp_t pkey = NULL;
+
+ /* Convert the S-expressions into the internal representation. */
+ what = "convert";
+ err = sexp_sscan (&skey, NULL, sample_secret_key, strlen (sample_secret_key));
+ if (!err)
+ err = sexp_sscan (&pkey, NULL,
+ sample_public_key, strlen (sample_public_key));
+ if (err)
+ {
+ errtxt = _gcry_strerror (err);
+ goto failed;
+ }
+
+ what = "key consistency";
+ err = _gcry_pk_testkey (skey);
+ if (err)
+ {
+ errtxt = _gcry_strerror (err);
+ goto failed;
+ }
+
+ what = "sign";
+ errtxt = selftest_sign_2048 (pkey, skey);
+ if (errtxt)
+ goto failed;
+
+ what = "encrypt";
+ errtxt = selftest_encr_2048 (pkey, skey);
+ if (errtxt)
+ goto failed;
+
+ sexp_release (pkey);
+ sexp_release (skey);
+ return 0; /* Succeeded. */
+
+ failed:
+ sexp_release (pkey);
+ sexp_release (skey);
+ if (report)
+ report ("pubkey", GCRY_PK_RSA, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ (void)extended;
+
+ switch (algo)
+ {
+ case GCRY_PK_RSA:
+ ec = selftests_rsa (report);
+ break;
+ default:
+ ec = GPG_ERR_PUBKEY_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+
+
+
+gcry_pk_spec_t _gcry_pubkey_spec_rsa =
+ {
+ GCRY_PK_RSA, { 0, 1 },
+ (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR),
+ "RSA", rsa_names,
+ "ne", "nedpqu", "a", "s", "n",
+ rsa_generate,
+ rsa_check_secret_key,
+ rsa_encrypt,
+ rsa_decrypt,
+ rsa_sign,
+ rsa_verify,
+ rsa_get_nbits,
+ run_selftests,
+ compute_keygrip
+ };
diff --git a/comm/third_party/libgcrypt/cipher/salsa20-amd64.S b/comm/third_party/libgcrypt/cipher/salsa20-amd64.S
new file mode 100644
index 0000000000..ae8f27155a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/salsa20-amd64.S
@@ -0,0 +1,940 @@
+/* salsa20-amd64.S - AMD64 implementation of Salsa20
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on public domain implementation by D. J. Bernstein at
+ * http://cr.yp.to/snuffle.html
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20)
+
+#include "asm-common-amd64.h"
+
+.text
+
+.align 8
+.globl _gcry_salsa20_amd64_keysetup
+ELF(.type _gcry_salsa20_amd64_keysetup,@function;)
+_gcry_salsa20_amd64_keysetup:
+ CFI_STARTPROC();
+ movl 0(%rsi),%r8d
+ movl 4(%rsi),%r9d
+ movl 8(%rsi),%eax
+ movl 12(%rsi),%r10d
+ movl %r8d,20(%rdi)
+ movl %r9d,40(%rdi)
+ movl %eax,60(%rdi)
+ movl %r10d,48(%rdi)
+ cmp $256,%rdx
+ jb .L_kbits128
+.L_kbits256:
+ movl 16(%rsi),%edx
+ movl 20(%rsi),%ecx
+ movl 24(%rsi),%r8d
+ movl 28(%rsi),%esi
+ movl %edx,28(%rdi)
+ movl %ecx,16(%rdi)
+ movl %r8d,36(%rdi)
+ movl %esi,56(%rdi)
+ mov $1634760805,%rsi
+ mov $857760878,%rdx
+ mov $2036477234,%rcx
+ mov $1797285236,%r8
+ movl %esi,0(%rdi)
+ movl %edx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %r8d,12(%rdi)
+ jmp .L_keysetupdone
+.L_kbits128:
+ movl 0(%rsi),%edx
+ movl 4(%rsi),%ecx
+ movl 8(%rsi),%r8d
+ movl 12(%rsi),%esi
+ movl %edx,28(%rdi)
+ movl %ecx,16(%rdi)
+ movl %r8d,36(%rdi)
+ movl %esi,56(%rdi)
+ mov $1634760805,%rsi
+ mov $824206446,%rdx
+ mov $2036477238,%rcx
+ mov $1797285236,%r8
+ movl %esi,0(%rdi)
+ movl %edx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %r8d,12(%rdi)
+.L_keysetupdone:
+ ret
+ CFI_ENDPROC();
+
+.align 8
+.globl _gcry_salsa20_amd64_ivsetup
+ELF(.type _gcry_salsa20_amd64_ivsetup,@function;)
+_gcry_salsa20_amd64_ivsetup:
+ CFI_STARTPROC();
+ movl 0(%rsi),%r8d
+ movl 4(%rsi),%esi
+ mov $0,%r9
+ mov $0,%rax
+ movl %r8d,24(%rdi)
+ movl %esi,44(%rdi)
+ movl %r9d,32(%rdi)
+ movl %eax,52(%rdi)
+ ret
+ CFI_ENDPROC();
+
+.align 8
+.globl _gcry_salsa20_amd64_encrypt_blocks
+ELF(.type _gcry_salsa20_amd64_encrypt_blocks,@function;)
+_gcry_salsa20_amd64_encrypt_blocks:
+ /*
+ * Modifications to original implementation:
+ * - Number of rounds passing in register %r8 (for Salsa20/12).
+ * - Length is input as number of blocks, so don't handle tail bytes
+ * (this is done in salsa20.c).
+ */
+ CFI_STARTPROC();
+ push %rbx
+ CFI_PUSH(%rbx);
+ shlq $6, %rcx /* blocks to bytes */
+ mov %r8, %rbx
+ mov %rsp,%r11
+ CFI_DEF_CFA_REGISTER(%r11);
+ sub $384,%rsp
+ and $~31,%rsp
+ mov %rdi,%r8
+ mov %rsi,%rsi
+ mov %rdx,%rdi
+ mov %rcx,%rdx
+ cmp $0,%rdx
+ jbe .L_done
+.L_start:
+ cmp $256,%rdx
+ jb .L_bytes_are_64_128_or_192
+ movdqa 0(%r8),%xmm0
+ pshufd $0x55,%xmm0,%xmm1
+ pshufd $0xaa,%xmm0,%xmm2
+ pshufd $0xff,%xmm0,%xmm3
+ pshufd $0x00,%xmm0,%xmm0
+ movdqa %xmm1,0(%rsp)
+ movdqa %xmm2,16(%rsp)
+ movdqa %xmm3,32(%rsp)
+ movdqa %xmm0,48(%rsp)
+ movdqa 16(%r8),%xmm0
+ pshufd $0xaa,%xmm0,%xmm1
+ pshufd $0xff,%xmm0,%xmm2
+ pshufd $0x00,%xmm0,%xmm3
+ pshufd $0x55,%xmm0,%xmm0
+ movdqa %xmm1,64(%rsp)
+ movdqa %xmm2,80(%rsp)
+ movdqa %xmm3,96(%rsp)
+ movdqa %xmm0,112(%rsp)
+ movdqa 32(%r8),%xmm0
+ pshufd $0xff,%xmm0,%xmm1
+ pshufd $0x55,%xmm0,%xmm2
+ pshufd $0xaa,%xmm0,%xmm0
+ movdqa %xmm1,128(%rsp)
+ movdqa %xmm2,144(%rsp)
+ movdqa %xmm0,160(%rsp)
+ movdqa 48(%r8),%xmm0
+ pshufd $0x00,%xmm0,%xmm1
+ pshufd $0xaa,%xmm0,%xmm2
+ pshufd $0xff,%xmm0,%xmm0
+ movdqa %xmm1,176(%rsp)
+ movdqa %xmm2,192(%rsp)
+ movdqa %xmm0,208(%rsp)
+.L_bytesatleast256:
+ movl 32(%r8),%ecx
+ movl 52(%r8),%r9d
+ movl %ecx,224(%rsp)
+ movl %r9d,240(%rsp)
+ add $1,%ecx
+ adc $0,%r9d
+ movl %ecx,4+224(%rsp)
+ movl %r9d,4+240(%rsp)
+ add $1,%ecx
+ adc $0,%r9d
+ movl %ecx,8+224(%rsp)
+ movl %r9d,8+240(%rsp)
+ add $1,%ecx
+ adc $0,%r9d
+ movl %ecx,12+224(%rsp)
+ movl %r9d,12+240(%rsp)
+ add $1,%ecx
+ adc $0,%r9d
+ movl %ecx,32(%r8)
+ movl %r9d,52(%r8)
+ movq %rdx,288(%rsp)
+ mov %rbx,%rdx
+ movdqa 0(%rsp),%xmm0
+ movdqa 16(%rsp),%xmm1
+ movdqa 32(%rsp),%xmm2
+ movdqa 192(%rsp),%xmm3
+ movdqa 208(%rsp),%xmm4
+ movdqa 64(%rsp),%xmm5
+ movdqa 80(%rsp),%xmm6
+ movdqa 112(%rsp),%xmm7
+ movdqa 128(%rsp),%xmm8
+ movdqa 144(%rsp),%xmm9
+ movdqa 160(%rsp),%xmm10
+ movdqa 240(%rsp),%xmm11
+ movdqa 48(%rsp),%xmm12
+ movdqa 96(%rsp),%xmm13
+ movdqa 176(%rsp),%xmm14
+ movdqa 224(%rsp),%xmm15
+.L_mainloop1:
+ movdqa %xmm1,256(%rsp)
+ movdqa %xmm2,272(%rsp)
+ movdqa %xmm13,%xmm1
+ paddd %xmm12,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $7,%xmm1
+ pxor %xmm1,%xmm14
+ psrld $25,%xmm2
+ pxor %xmm2,%xmm14
+ movdqa %xmm7,%xmm1
+ paddd %xmm0,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $7,%xmm1
+ pxor %xmm1,%xmm11
+ psrld $25,%xmm2
+ pxor %xmm2,%xmm11
+ movdqa %xmm12,%xmm1
+ paddd %xmm14,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $9,%xmm1
+ pxor %xmm1,%xmm15
+ psrld $23,%xmm2
+ pxor %xmm2,%xmm15
+ movdqa %xmm0,%xmm1
+ paddd %xmm11,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $9,%xmm1
+ pxor %xmm1,%xmm9
+ psrld $23,%xmm2
+ pxor %xmm2,%xmm9
+ movdqa %xmm14,%xmm1
+ paddd %xmm15,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $13,%xmm1
+ pxor %xmm1,%xmm13
+ psrld $19,%xmm2
+ pxor %xmm2,%xmm13
+ movdqa %xmm11,%xmm1
+ paddd %xmm9,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $13,%xmm1
+ pxor %xmm1,%xmm7
+ psrld $19,%xmm2
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm1
+ paddd %xmm13,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $18,%xmm1
+ pxor %xmm1,%xmm12
+ psrld $14,%xmm2
+ pxor %xmm2,%xmm12
+ movdqa 256(%rsp),%xmm1
+ movdqa %xmm12,256(%rsp)
+ movdqa %xmm9,%xmm2
+ paddd %xmm7,%xmm2
+ movdqa %xmm2,%xmm12
+ pslld $18,%xmm2
+ pxor %xmm2,%xmm0
+ psrld $14,%xmm12
+ pxor %xmm12,%xmm0
+ movdqa %xmm5,%xmm2
+ paddd %xmm1,%xmm2
+ movdqa %xmm2,%xmm12
+ pslld $7,%xmm2
+ pxor %xmm2,%xmm3
+ psrld $25,%xmm12
+ pxor %xmm12,%xmm3
+ movdqa 272(%rsp),%xmm2
+ movdqa %xmm0,272(%rsp)
+ movdqa %xmm6,%xmm0
+ paddd %xmm2,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $7,%xmm0
+ pxor %xmm0,%xmm4
+ psrld $25,%xmm12
+ pxor %xmm12,%xmm4
+ movdqa %xmm1,%xmm0
+ paddd %xmm3,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $9,%xmm0
+ pxor %xmm0,%xmm10
+ psrld $23,%xmm12
+ pxor %xmm12,%xmm10
+ movdqa %xmm2,%xmm0
+ paddd %xmm4,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $9,%xmm0
+ pxor %xmm0,%xmm8
+ psrld $23,%xmm12
+ pxor %xmm12,%xmm8
+ movdqa %xmm3,%xmm0
+ paddd %xmm10,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $13,%xmm0
+ pxor %xmm0,%xmm5
+ psrld $19,%xmm12
+ pxor %xmm12,%xmm5
+ movdqa %xmm4,%xmm0
+ paddd %xmm8,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $13,%xmm0
+ pxor %xmm0,%xmm6
+ psrld $19,%xmm12
+ pxor %xmm12,%xmm6
+ movdqa %xmm10,%xmm0
+ paddd %xmm5,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $18,%xmm0
+ pxor %xmm0,%xmm1
+ psrld $14,%xmm12
+ pxor %xmm12,%xmm1
+ movdqa 256(%rsp),%xmm0
+ movdqa %xmm1,256(%rsp)
+ movdqa %xmm4,%xmm1
+ paddd %xmm0,%xmm1
+ movdqa %xmm1,%xmm12
+ pslld $7,%xmm1
+ pxor %xmm1,%xmm7
+ psrld $25,%xmm12
+ pxor %xmm12,%xmm7
+ movdqa %xmm8,%xmm1
+ paddd %xmm6,%xmm1
+ movdqa %xmm1,%xmm12
+ pslld $18,%xmm1
+ pxor %xmm1,%xmm2
+ psrld $14,%xmm12
+ pxor %xmm12,%xmm2
+ movdqa 272(%rsp),%xmm12
+ movdqa %xmm2,272(%rsp)
+ movdqa %xmm14,%xmm1
+ paddd %xmm12,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $7,%xmm1
+ pxor %xmm1,%xmm5
+ psrld $25,%xmm2
+ pxor %xmm2,%xmm5
+ movdqa %xmm0,%xmm1
+ paddd %xmm7,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $9,%xmm1
+ pxor %xmm1,%xmm10
+ psrld $23,%xmm2
+ pxor %xmm2,%xmm10
+ movdqa %xmm12,%xmm1
+ paddd %xmm5,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $9,%xmm1
+ pxor %xmm1,%xmm8
+ psrld $23,%xmm2
+ pxor %xmm2,%xmm8
+ movdqa %xmm7,%xmm1
+ paddd %xmm10,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $13,%xmm1
+ pxor %xmm1,%xmm4
+ psrld $19,%xmm2
+ pxor %xmm2,%xmm4
+ movdqa %xmm5,%xmm1
+ paddd %xmm8,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $13,%xmm1
+ pxor %xmm1,%xmm14
+ psrld $19,%xmm2
+ pxor %xmm2,%xmm14
+ movdqa %xmm10,%xmm1
+ paddd %xmm4,%xmm1
+ movdqa %xmm1,%xmm2
+ pslld $18,%xmm1
+ pxor %xmm1,%xmm0
+ psrld $14,%xmm2
+ pxor %xmm2,%xmm0
+ movdqa 256(%rsp),%xmm1
+ movdqa %xmm0,256(%rsp)
+ movdqa %xmm8,%xmm0
+ paddd %xmm14,%xmm0
+ movdqa %xmm0,%xmm2
+ pslld $18,%xmm0
+ pxor %xmm0,%xmm12
+ psrld $14,%xmm2
+ pxor %xmm2,%xmm12
+ movdqa %xmm11,%xmm0
+ paddd %xmm1,%xmm0
+ movdqa %xmm0,%xmm2
+ pslld $7,%xmm0
+ pxor %xmm0,%xmm6
+ psrld $25,%xmm2
+ pxor %xmm2,%xmm6
+ movdqa 272(%rsp),%xmm2
+ movdqa %xmm12,272(%rsp)
+ movdqa %xmm3,%xmm0
+ paddd %xmm2,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $7,%xmm0
+ pxor %xmm0,%xmm13
+ psrld $25,%xmm12
+ pxor %xmm12,%xmm13
+ movdqa %xmm1,%xmm0
+ paddd %xmm6,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $9,%xmm0
+ pxor %xmm0,%xmm15
+ psrld $23,%xmm12
+ pxor %xmm12,%xmm15
+ movdqa %xmm2,%xmm0
+ paddd %xmm13,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $9,%xmm0
+ pxor %xmm0,%xmm9
+ psrld $23,%xmm12
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm0
+ paddd %xmm15,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $13,%xmm0
+ pxor %xmm0,%xmm11
+ psrld $19,%xmm12
+ pxor %xmm12,%xmm11
+ movdqa %xmm13,%xmm0
+ paddd %xmm9,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $13,%xmm0
+ pxor %xmm0,%xmm3
+ psrld $19,%xmm12
+ pxor %xmm12,%xmm3
+ movdqa %xmm15,%xmm0
+ paddd %xmm11,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $18,%xmm0
+ pxor %xmm0,%xmm1
+ psrld $14,%xmm12
+ pxor %xmm12,%xmm1
+ movdqa %xmm9,%xmm0
+ paddd %xmm3,%xmm0
+ movdqa %xmm0,%xmm12
+ pslld $18,%xmm0
+ pxor %xmm0,%xmm2
+ psrld $14,%xmm12
+ pxor %xmm12,%xmm2
+ movdqa 256(%rsp),%xmm12
+ movdqa 272(%rsp),%xmm0
+ sub $2,%rdx
+ ja .L_mainloop1
+ paddd 48(%rsp),%xmm12
+ paddd 112(%rsp),%xmm7
+ paddd 160(%rsp),%xmm10
+ paddd 208(%rsp),%xmm4
+ movd %xmm12,%rdx
+ movd %xmm7,%rcx
+ movd %xmm10,%r9
+ movd %xmm4,%rax
+ pshufd $0x39,%xmm12,%xmm12
+ pshufd $0x39,%xmm7,%xmm7
+ pshufd $0x39,%xmm10,%xmm10
+ pshufd $0x39,%xmm4,%xmm4
+ xorl 0(%rsi),%edx
+ xorl 4(%rsi),%ecx
+ xorl 8(%rsi),%r9d
+ xorl 12(%rsi),%eax
+ movl %edx,0(%rdi)
+ movl %ecx,4(%rdi)
+ movl %r9d,8(%rdi)
+ movl %eax,12(%rdi)
+ movd %xmm12,%rdx
+ movd %xmm7,%rcx
+ movd %xmm10,%r9
+ movd %xmm4,%rax
+ pshufd $0x39,%xmm12,%xmm12
+ pshufd $0x39,%xmm7,%xmm7
+ pshufd $0x39,%xmm10,%xmm10
+ pshufd $0x39,%xmm4,%xmm4
+ xorl 64(%rsi),%edx
+ xorl 68(%rsi),%ecx
+ xorl 72(%rsi),%r9d
+ xorl 76(%rsi),%eax
+ movl %edx,64(%rdi)
+ movl %ecx,68(%rdi)
+ movl %r9d,72(%rdi)
+ movl %eax,76(%rdi)
+ movd %xmm12,%rdx
+ movd %xmm7,%rcx
+ movd %xmm10,%r9
+ movd %xmm4,%rax
+ pshufd $0x39,%xmm12,%xmm12
+ pshufd $0x39,%xmm7,%xmm7
+ pshufd $0x39,%xmm10,%xmm10
+ pshufd $0x39,%xmm4,%xmm4
+ xorl 128(%rsi),%edx
+ xorl 132(%rsi),%ecx
+ xorl 136(%rsi),%r9d
+ xorl 140(%rsi),%eax
+ movl %edx,128(%rdi)
+ movl %ecx,132(%rdi)
+ movl %r9d,136(%rdi)
+ movl %eax,140(%rdi)
+ movd %xmm12,%rdx
+ movd %xmm7,%rcx
+ movd %xmm10,%r9
+ movd %xmm4,%rax
+ xorl 192(%rsi),%edx
+ xorl 196(%rsi),%ecx
+ xorl 200(%rsi),%r9d
+ xorl 204(%rsi),%eax
+ movl %edx,192(%rdi)
+ movl %ecx,196(%rdi)
+ movl %r9d,200(%rdi)
+ movl %eax,204(%rdi)
+ paddd 176(%rsp),%xmm14
+ paddd 0(%rsp),%xmm0
+ paddd 64(%rsp),%xmm5
+ paddd 128(%rsp),%xmm8
+ movd %xmm14,%rdx
+ movd %xmm0,%rcx
+ movd %xmm5,%r9
+ movd %xmm8,%rax
+ pshufd $0x39,%xmm14,%xmm14
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x39,%xmm5,%xmm5
+ pshufd $0x39,%xmm8,%xmm8
+ xorl 16(%rsi),%edx
+ xorl 20(%rsi),%ecx
+ xorl 24(%rsi),%r9d
+ xorl 28(%rsi),%eax
+ movl %edx,16(%rdi)
+ movl %ecx,20(%rdi)
+ movl %r9d,24(%rdi)
+ movl %eax,28(%rdi)
+ movd %xmm14,%rdx
+ movd %xmm0,%rcx
+ movd %xmm5,%r9
+ movd %xmm8,%rax
+ pshufd $0x39,%xmm14,%xmm14
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x39,%xmm5,%xmm5
+ pshufd $0x39,%xmm8,%xmm8
+ xorl 80(%rsi),%edx
+ xorl 84(%rsi),%ecx
+ xorl 88(%rsi),%r9d
+ xorl 92(%rsi),%eax
+ movl %edx,80(%rdi)
+ movl %ecx,84(%rdi)
+ movl %r9d,88(%rdi)
+ movl %eax,92(%rdi)
+ movd %xmm14,%rdx
+ movd %xmm0,%rcx
+ movd %xmm5,%r9
+ movd %xmm8,%rax
+ pshufd $0x39,%xmm14,%xmm14
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x39,%xmm5,%xmm5
+ pshufd $0x39,%xmm8,%xmm8
+ xorl 144(%rsi),%edx
+ xorl 148(%rsi),%ecx
+ xorl 152(%rsi),%r9d
+ xorl 156(%rsi),%eax
+ movl %edx,144(%rdi)
+ movl %ecx,148(%rdi)
+ movl %r9d,152(%rdi)
+ movl %eax,156(%rdi)
+ movd %xmm14,%rdx
+ movd %xmm0,%rcx
+ movd %xmm5,%r9
+ movd %xmm8,%rax
+ xorl 208(%rsi),%edx
+ xorl 212(%rsi),%ecx
+ xorl 216(%rsi),%r9d
+ xorl 220(%rsi),%eax
+ movl %edx,208(%rdi)
+ movl %ecx,212(%rdi)
+ movl %r9d,216(%rdi)
+ movl %eax,220(%rdi)
+ paddd 224(%rsp),%xmm15
+ paddd 240(%rsp),%xmm11
+ paddd 16(%rsp),%xmm1
+ paddd 80(%rsp),%xmm6
+ movd %xmm15,%rdx
+ movd %xmm11,%rcx
+ movd %xmm1,%r9
+ movd %xmm6,%rax
+ pshufd $0x39,%xmm15,%xmm15
+ pshufd $0x39,%xmm11,%xmm11
+ pshufd $0x39,%xmm1,%xmm1
+ pshufd $0x39,%xmm6,%xmm6
+ xorl 32(%rsi),%edx
+ xorl 36(%rsi),%ecx
+ xorl 40(%rsi),%r9d
+ xorl 44(%rsi),%eax
+ movl %edx,32(%rdi)
+ movl %ecx,36(%rdi)
+ movl %r9d,40(%rdi)
+ movl %eax,44(%rdi)
+ movd %xmm15,%rdx
+ movd %xmm11,%rcx
+ movd %xmm1,%r9
+ movd %xmm6,%rax
+ pshufd $0x39,%xmm15,%xmm15
+ pshufd $0x39,%xmm11,%xmm11
+ pshufd $0x39,%xmm1,%xmm1
+ pshufd $0x39,%xmm6,%xmm6
+ xorl 96(%rsi),%edx
+ xorl 100(%rsi),%ecx
+ xorl 104(%rsi),%r9d
+ xorl 108(%rsi),%eax
+ movl %edx,96(%rdi)
+ movl %ecx,100(%rdi)
+ movl %r9d,104(%rdi)
+ movl %eax,108(%rdi)
+ movd %xmm15,%rdx
+ movd %xmm11,%rcx
+ movd %xmm1,%r9
+ movd %xmm6,%rax
+ pshufd $0x39,%xmm15,%xmm15
+ pshufd $0x39,%xmm11,%xmm11
+ pshufd $0x39,%xmm1,%xmm1
+ pshufd $0x39,%xmm6,%xmm6
+ xorl 160(%rsi),%edx
+ xorl 164(%rsi),%ecx
+ xorl 168(%rsi),%r9d
+ xorl 172(%rsi),%eax
+ movl %edx,160(%rdi)
+ movl %ecx,164(%rdi)
+ movl %r9d,168(%rdi)
+ movl %eax,172(%rdi)
+ movd %xmm15,%rdx
+ movd %xmm11,%rcx
+ movd %xmm1,%r9
+ movd %xmm6,%rax
+ xorl 224(%rsi),%edx
+ xorl 228(%rsi),%ecx
+ xorl 232(%rsi),%r9d
+ xorl 236(%rsi),%eax
+ movl %edx,224(%rdi)
+ movl %ecx,228(%rdi)
+ movl %r9d,232(%rdi)
+ movl %eax,236(%rdi)
+ paddd 96(%rsp),%xmm13
+ paddd 144(%rsp),%xmm9
+ paddd 192(%rsp),%xmm3
+ paddd 32(%rsp),%xmm2
+ movd %xmm13,%rdx
+ movd %xmm9,%rcx
+ movd %xmm3,%r9
+ movd %xmm2,%rax
+ pshufd $0x39,%xmm13,%xmm13
+ pshufd $0x39,%xmm9,%xmm9
+ pshufd $0x39,%xmm3,%xmm3
+ pshufd $0x39,%xmm2,%xmm2
+ xorl 48(%rsi),%edx
+ xorl 52(%rsi),%ecx
+ xorl 56(%rsi),%r9d
+ xorl 60(%rsi),%eax
+ movl %edx,48(%rdi)
+ movl %ecx,52(%rdi)
+ movl %r9d,56(%rdi)
+ movl %eax,60(%rdi)
+ movd %xmm13,%rdx
+ movd %xmm9,%rcx
+ movd %xmm3,%r9
+ movd %xmm2,%rax
+ pshufd $0x39,%xmm13,%xmm13
+ pshufd $0x39,%xmm9,%xmm9
+ pshufd $0x39,%xmm3,%xmm3
+ pshufd $0x39,%xmm2,%xmm2
+ xorl 112(%rsi),%edx
+ xorl 116(%rsi),%ecx
+ xorl 120(%rsi),%r9d
+ xorl 124(%rsi),%eax
+ movl %edx,112(%rdi)
+ movl %ecx,116(%rdi)
+ movl %r9d,120(%rdi)
+ movl %eax,124(%rdi)
+ movd %xmm13,%rdx
+ movd %xmm9,%rcx
+ movd %xmm3,%r9
+ movd %xmm2,%rax
+ pshufd $0x39,%xmm13,%xmm13
+ pshufd $0x39,%xmm9,%xmm9
+ pshufd $0x39,%xmm3,%xmm3
+ pshufd $0x39,%xmm2,%xmm2
+ xorl 176(%rsi),%edx
+ xorl 180(%rsi),%ecx
+ xorl 184(%rsi),%r9d
+ xorl 188(%rsi),%eax
+ movl %edx,176(%rdi)
+ movl %ecx,180(%rdi)
+ movl %r9d,184(%rdi)
+ movl %eax,188(%rdi)
+ movd %xmm13,%rdx
+ movd %xmm9,%rcx
+ movd %xmm3,%r9
+ movd %xmm2,%rax
+ xorl 240(%rsi),%edx
+ xorl 244(%rsi),%ecx
+ xorl 248(%rsi),%r9d
+ xorl 252(%rsi),%eax
+ movl %edx,240(%rdi)
+ movl %ecx,244(%rdi)
+ movl %r9d,248(%rdi)
+ movl %eax,252(%rdi)
+ movq 288(%rsp),%rdx
+ sub $256,%rdx
+ add $256,%rsi
+ add $256,%rdi
+ cmp $256,%rdx
+ jae .L_bytesatleast256
+ cmp $0,%rdx
+ jbe .L_done
+.L_bytes_are_64_128_or_192:
+ movq %rdx,288(%rsp)
+ movdqa 0(%r8),%xmm0
+ movdqa 16(%r8),%xmm1
+ movdqa 32(%r8),%xmm2
+ movdqa 48(%r8),%xmm3
+ movdqa %xmm1,%xmm4
+ mov %rbx,%rdx
+.L_mainloop2:
+ paddd %xmm0,%xmm4
+ movdqa %xmm0,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $7,%xmm4
+ psrld $25,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm6,%xmm3
+ paddd %xmm3,%xmm5
+ movdqa %xmm3,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $9,%xmm5
+ psrld $23,%xmm6
+ pxor %xmm5,%xmm2
+ pshufd $0x93,%xmm3,%xmm3
+ pxor %xmm6,%xmm2
+ paddd %xmm2,%xmm4
+ movdqa %xmm2,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $13,%xmm4
+ psrld $19,%xmm6
+ pxor %xmm4,%xmm1
+ pshufd $0x4e,%xmm2,%xmm2
+ pxor %xmm6,%xmm1
+ paddd %xmm1,%xmm5
+ movdqa %xmm3,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $18,%xmm5
+ psrld $14,%xmm6
+ pxor %xmm5,%xmm0
+ pshufd $0x39,%xmm1,%xmm1
+ pxor %xmm6,%xmm0
+ paddd %xmm0,%xmm4
+ movdqa %xmm0,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $7,%xmm4
+ psrld $25,%xmm6
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm1
+ paddd %xmm1,%xmm5
+ movdqa %xmm1,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $9,%xmm5
+ psrld $23,%xmm6
+ pxor %xmm5,%xmm2
+ pshufd $0x93,%xmm1,%xmm1
+ pxor %xmm6,%xmm2
+ paddd %xmm2,%xmm4
+ movdqa %xmm2,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $13,%xmm4
+ psrld $19,%xmm6
+ pxor %xmm4,%xmm3
+ pshufd $0x4e,%xmm2,%xmm2
+ pxor %xmm6,%xmm3
+ paddd %xmm3,%xmm5
+ movdqa %xmm1,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $18,%xmm5
+ psrld $14,%xmm6
+ pxor %xmm5,%xmm0
+ pshufd $0x39,%xmm3,%xmm3
+ pxor %xmm6,%xmm0
+ paddd %xmm0,%xmm4
+ movdqa %xmm0,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $7,%xmm4
+ psrld $25,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm6,%xmm3
+ paddd %xmm3,%xmm5
+ movdqa %xmm3,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $9,%xmm5
+ psrld $23,%xmm6
+ pxor %xmm5,%xmm2
+ pshufd $0x93,%xmm3,%xmm3
+ pxor %xmm6,%xmm2
+ paddd %xmm2,%xmm4
+ movdqa %xmm2,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $13,%xmm4
+ psrld $19,%xmm6
+ pxor %xmm4,%xmm1
+ pshufd $0x4e,%xmm2,%xmm2
+ pxor %xmm6,%xmm1
+ paddd %xmm1,%xmm5
+ movdqa %xmm3,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $18,%xmm5
+ psrld $14,%xmm6
+ pxor %xmm5,%xmm0
+ pshufd $0x39,%xmm1,%xmm1
+ pxor %xmm6,%xmm0
+ paddd %xmm0,%xmm4
+ movdqa %xmm0,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $7,%xmm4
+ psrld $25,%xmm6
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm1
+ paddd %xmm1,%xmm5
+ movdqa %xmm1,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $9,%xmm5
+ psrld $23,%xmm6
+ pxor %xmm5,%xmm2
+ pshufd $0x93,%xmm1,%xmm1
+ pxor %xmm6,%xmm2
+ paddd %xmm2,%xmm4
+ movdqa %xmm2,%xmm5
+ movdqa %xmm4,%xmm6
+ pslld $13,%xmm4
+ psrld $19,%xmm6
+ pxor %xmm4,%xmm3
+ pshufd $0x4e,%xmm2,%xmm2
+ pxor %xmm6,%xmm3
+ sub $4,%rdx
+ paddd %xmm3,%xmm5
+ movdqa %xmm1,%xmm4
+ movdqa %xmm5,%xmm6
+ pslld $18,%xmm5
+ pxor %xmm7,%xmm7
+ psrld $14,%xmm6
+ pxor %xmm5,%xmm0
+ pshufd $0x39,%xmm3,%xmm3
+ pxor %xmm6,%xmm0
+ ja .L_mainloop2
+ paddd 0(%r8),%xmm0
+ paddd 16(%r8),%xmm1
+ paddd 32(%r8),%xmm2
+ paddd 48(%r8),%xmm3
+ movd %xmm0,%rdx
+ movd %xmm1,%rcx
+ movd %xmm2,%rax
+ movd %xmm3,%r10
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x39,%xmm1,%xmm1
+ pshufd $0x39,%xmm2,%xmm2
+ pshufd $0x39,%xmm3,%xmm3
+ xorl 0(%rsi),%edx
+ xorl 48(%rsi),%ecx
+ xorl 32(%rsi),%eax
+ xorl 16(%rsi),%r10d
+ movl %edx,0(%rdi)
+ movl %ecx,48(%rdi)
+ movl %eax,32(%rdi)
+ movl %r10d,16(%rdi)
+ movd %xmm0,%rdx
+ movd %xmm1,%rcx
+ movd %xmm2,%rax
+ movd %xmm3,%r10
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x39,%xmm1,%xmm1
+ pshufd $0x39,%xmm2,%xmm2
+ pshufd $0x39,%xmm3,%xmm3
+ xorl 20(%rsi),%edx
+ xorl 4(%rsi),%ecx
+ xorl 52(%rsi),%eax
+ xorl 36(%rsi),%r10d
+ movl %edx,20(%rdi)
+ movl %ecx,4(%rdi)
+ movl %eax,52(%rdi)
+ movl %r10d,36(%rdi)
+ movd %xmm0,%rdx
+ movd %xmm1,%rcx
+ movd %xmm2,%rax
+ movd %xmm3,%r10
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x39,%xmm1,%xmm1
+ pshufd $0x39,%xmm2,%xmm2
+ pshufd $0x39,%xmm3,%xmm3
+ xorl 40(%rsi),%edx
+ xorl 24(%rsi),%ecx
+ xorl 8(%rsi),%eax
+ xorl 56(%rsi),%r10d
+ movl %edx,40(%rdi)
+ movl %ecx,24(%rdi)
+ movl %eax,8(%rdi)
+ movl %r10d,56(%rdi)
+ movd %xmm0,%rdx
+ movd %xmm1,%rcx
+ movd %xmm2,%rax
+ movd %xmm3,%r10
+ xorl 60(%rsi),%edx
+ xorl 44(%rsi),%ecx
+ xorl 28(%rsi),%eax
+ xorl 12(%rsi),%r10d
+ movl %edx,60(%rdi)
+ movl %ecx,44(%rdi)
+ movl %eax,28(%rdi)
+ movl %r10d,12(%rdi)
+ movq 288(%rsp),%rdx
+ movl 32(%r8),%ecx
+ movl 52(%r8),%eax
+ add $1,%ecx
+ adc $0,%eax
+ movl %ecx,32(%r8)
+ movl %eax,52(%r8)
+ cmp $64,%rdx
+ ja .L_bytes_are_128_or_192
+.L_done:
+ CFI_REMEMBER_STATE();
+ mov %r11,%rax
+ sub %rsp,%rax
+ mov %r11,%rsp
+ CFI_REGISTER(%r11, %rsp)
+ CFI_DEF_CFA_REGISTER(%rsp)
+ pop %rbx
+ CFI_POP(%rbx)
+ ret
+ CFI_RESTORE_STATE();
+.L_bytes_are_128_or_192:
+ sub $64,%rdx
+ add $64,%rdi
+ add $64,%rsi
+ jmp .L_bytes_are_64_128_or_192
+ CFI_ENDPROC();
+ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;)
+
+#endif /*defined(USE_SALSA20)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/salsa20-armv7-neon.S b/comm/third_party/libgcrypt/cipher/salsa20-armv7-neon.S
new file mode 100644
index 0000000000..3686e3fa6f
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/salsa20-armv7-neon.S
@@ -0,0 +1,899 @@
+/* salsa-armv7-neon.S - ARM NEON implementation of Salsa20 cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON) && defined(USE_SALSA20)
+
+/*
+ * Based on public domain implementation from SUPERCOP benchmarking framework
+ * by Peter Schwabe and D. J. Bernstein. Paper about the implementation at:
+ * http://cryptojedi.org/papers/#neoncrypto
+ */
+
+.syntax unified
+.arm
+.fpu neon
+.text
+
+.align 2
+.globl _gcry_arm_neon_salsa20_encrypt
+.type _gcry_arm_neon_salsa20_encrypt,%function;
+_gcry_arm_neon_salsa20_encrypt:
+ /* Modifications:
+ * - arguments changed to (void *c, const void *m, unsigned int nblks,
+ * void *ctx, unsigned int rounds) from (void *c, const void *m,
+ * unsigned long long mlen, const void *n, const void *k)
+ * - nonce and key read from 'ctx' as well as sigma and counter.
+ * - read in counter from 'ctx' at the start.
+ * - update counter in 'ctx' at the end.
+ * - length is input as number of blocks, so don't handle tail bytes
+ * (this is done in salsa20.c).
+ */
+ lsl r2,r2,#6
+ vpush {q4,q5,q6,q7}
+ mov r12,sp
+ sub sp,sp,#352
+ and sp,sp,#0xffffffe0
+ strd r4,[sp,#0]
+ strd r6,[sp,#8]
+ strd r8,[sp,#16]
+ strd r10,[sp,#24]
+ str r14,[sp,#224]
+ str r12,[sp,#228]
+ str r0,[sp,#232]
+ str r1,[sp,#236]
+ str r2,[sp,#240]
+ ldr r4,[r12,#64]
+ str r4,[sp,#244]
+ mov r2,r3
+ add r3,r2,#48
+ vld1.8 {q3},[r2]
+ add r0,r2,#32
+ add r14,r2,#40
+ vmov.i64 q3,#0xff
+ str r14,[sp,#160]
+ ldrd r8,[r2,#4]
+ vld1.8 {d0},[r0]
+ ldrd r4,[r2,#20]
+ vld1.8 {d8-d9},[r2]!
+ ldrd r6,[r0,#0]
+ vmov d4,d9
+ ldr r0,[r14]
+ vrev64.i32 d0,d0
+ ldr r1,[r14,#4]
+ vld1.8 {d10-d11},[r2]
+ strd r6,[sp,#32]
+ sub r2,r2,#16
+ strd r0,[sp,#40]
+ vmov d5,d11
+ strd r8,[sp,#48]
+ vext.32 d1,d0,d10,#1
+ strd r4,[sp,#56]
+ ldr r1,[r2,#0]
+ vshr.u32 q3,q3,#7
+ ldr r4,[r2,#12]
+ vext.32 d3,d11,d9,#1
+ ldr r11,[r2,#16]
+ vext.32 d2,d8,d0,#1
+ ldr r8,[r2,#28]
+ vext.32 d0,d10,d8,#1
+ ldr r0,[r3,#0]
+ add r2,r2,#44
+ vmov q4,q3
+ vld1.8 {d6-d7},[r14]
+ vadd.i64 q3,q3,q4
+ ldr r5,[r3,#4]
+ add r12,sp,#256
+ vst1.8 {d4-d5},[r12,: 128]
+ ldr r10,[r3,#8]
+ add r14,sp,#272
+ vst1.8 {d2-d3},[r14,: 128]
+ ldr r9,[r3,#12]
+ vld1.8 {d2-d3},[r3]
+ strd r0,[sp,#64]
+ ldr r0,[sp,#240]
+ strd r4,[sp,#72]
+ strd r10,[sp,#80]
+ strd r8,[sp,#88]
+ nop
+ cmp r0,#192
+ blo .L_mlenlowbelow192
+.L_mlenatleast192:
+ ldrd r2,[sp,#48]
+ vext.32 d7,d6,d6,#1
+ vmov q8,q1
+ ldrd r6,[sp,#32]
+ vld1.8 {d18-d19},[r12,: 128]
+ vmov q10,q0
+ str r0,[sp,#240]
+ vext.32 d4,d7,d19,#1
+ vmov q11,q8
+ vext.32 d10,d18,d7,#1
+ vadd.i64 q3,q3,q4
+ ldrd r0,[sp,#64]
+ vld1.8 {d24-d25},[r14,: 128]
+ vmov d5,d24
+ add r8,sp,#288
+ ldrd r4,[sp,#72]
+ vmov d11,d25
+ add r9,sp,#304
+ ldrd r10,[sp,#80]
+ vst1.8 {d4-d5},[r8,: 128]
+ strd r2,[sp,#96]
+ vext.32 d7,d6,d6,#1
+ vmov q13,q10
+ strd r6,[sp,#104]
+ vmov d13,d24
+ vst1.8 {d10-d11},[r9,: 128]
+ add r2,sp,#320
+ vext.32 d12,d7,d19,#1
+ vmov d15,d25
+ add r6,sp,#336
+ ldr r12,[sp,#244]
+ vext.32 d14,d18,d7,#1
+ vadd.i64 q3,q3,q4
+ ldrd r8,[sp,#88]
+ vst1.8 {d12-d13},[r2,: 128]
+ ldrd r2,[sp,#56]
+ vst1.8 {d14-d15},[r6,: 128]
+ ldrd r6,[sp,#40]
+.L_mainloop2:
+ str r12,[sp,#248]
+ vadd.i32 q4,q10,q8
+ vadd.i32 q9,q13,q11
+ add r12,r0,r2
+ add r14,r5,r1
+ vshl.i32 q12,q4,#7
+ vshl.i32 q14,q9,#7
+ vshr.u32 q4,q4,#25
+ vshr.u32 q9,q9,#25
+ eor r4,r4,r12,ROR #25
+ eor r7,r7,r14,ROR #25
+ add r12,r4,r0
+ add r14,r7,r5
+ veor q5,q5,q12
+ veor q7,q7,q14
+ veor q4,q5,q4
+ veor q5,q7,q9
+ eor r6,r6,r12,ROR #23
+ eor r3,r3,r14,ROR #23
+ add r12,r6,r4
+ str r7,[sp,#116]
+ add r7,r3,r7
+ ldr r14,[sp,#108]
+ vadd.i32 q7,q8,q4
+ vadd.i32 q9,q11,q5
+ vshl.i32 q12,q7,#9
+ vshl.i32 q14,q9,#9
+ vshr.u32 q7,q7,#23
+ vshr.u32 q9,q9,#23
+ veor q2,q2,q12
+ veor q6,q6,q14
+ veor q2,q2,q7
+ veor q6,q6,q9
+ eor r2,r2,r12,ROR #19
+ str r2,[sp,#120]
+ eor r1,r1,r7,ROR #19
+ ldr r7,[sp,#96]
+ add r2,r2,r6
+ str r6,[sp,#112]
+ add r6,r1,r3
+ ldr r12,[sp,#104]
+ vadd.i32 q7,q4,q2
+ vext.32 q4,q4,q4,#3
+ vadd.i32 q9,q5,q6
+ vshl.i32 q12,q7,#13
+ vext.32 q5,q5,q5,#3
+ vshl.i32 q14,q9,#13
+ eor r0,r0,r2,ROR #14
+ eor r2,r5,r6,ROR #14
+ str r3,[sp,#124]
+ add r3,r10,r12
+ ldr r5,[sp,#100]
+ add r6,r9,r11
+ vshr.u32 q7,q7,#19
+ vshr.u32 q9,q9,#19
+ veor q10,q10,q12
+ veor q12,q13,q14
+ eor r8,r8,r3,ROR #25
+ eor r3,r5,r6,ROR #25
+ add r5,r8,r10
+ add r6,r3,r9
+ veor q7,q10,q7
+ veor q9,q12,q9
+ eor r5,r7,r5,ROR #23
+ eor r6,r14,r6,ROR #23
+ add r7,r5,r8
+ add r14,r6,r3
+ vadd.i32 q10,q2,q7
+ vswp d4,d5
+ vadd.i32 q12,q6,q9
+ vshl.i32 q13,q10,#18
+ vswp d12,d13
+ vshl.i32 q14,q12,#18
+ eor r7,r12,r7,ROR #19
+ eor r11,r11,r14,ROR #19
+ add r12,r7,r5
+ add r14,r11,r6
+ vshr.u32 q10,q10,#14
+ vext.32 q7,q7,q7,#1
+ vshr.u32 q12,q12,#14
+ veor q8,q8,q13
+ vext.32 q9,q9,q9,#1
+ veor q11,q11,q14
+ eor r10,r10,r12,ROR #14
+ eor r9,r9,r14,ROR #14
+ add r12,r0,r3
+ add r14,r2,r4
+ veor q8,q8,q10
+ veor q10,q11,q12
+ eor r1,r1,r12,ROR #25
+ eor r7,r7,r14,ROR #25
+ add r12,r1,r0
+ add r14,r7,r2
+ vadd.i32 q11,q4,q8
+ vadd.i32 q12,q5,q10
+ vshl.i32 q13,q11,#7
+ vshl.i32 q14,q12,#7
+ eor r5,r5,r12,ROR #23
+ eor r6,r6,r14,ROR #23
+ vshr.u32 q11,q11,#25
+ vshr.u32 q12,q12,#25
+ add r12,r5,r1
+ add r14,r6,r7
+ veor q7,q7,q13
+ veor q9,q9,q14
+ veor q7,q7,q11
+ veor q9,q9,q12
+ vadd.i32 q11,q8,q7
+ vadd.i32 q12,q10,q9
+ vshl.i32 q13,q11,#9
+ vshl.i32 q14,q12,#9
+ eor r3,r3,r12,ROR #19
+ str r7,[sp,#104]
+ eor r4,r4,r14,ROR #19
+ ldr r7,[sp,#112]
+ add r12,r3,r5
+ str r6,[sp,#108]
+ add r6,r4,r6
+ ldr r14,[sp,#116]
+ eor r0,r0,r12,ROR #14
+ str r5,[sp,#96]
+ eor r5,r2,r6,ROR #14
+ ldr r2,[sp,#120]
+ vshr.u32 q11,q11,#23
+ vshr.u32 q12,q12,#23
+ veor q2,q2,q13
+ veor q6,q6,q14
+ veor q2,q2,q11
+ veor q6,q6,q12
+ add r6,r10,r14
+ add r12,r9,r8
+ vadd.i32 q11,q7,q2
+ vext.32 q7,q7,q7,#3
+ vadd.i32 q12,q9,q6
+ vshl.i32 q13,q11,#13
+ vext.32 q9,q9,q9,#3
+ vshl.i32 q14,q12,#13
+ vshr.u32 q11,q11,#19
+ vshr.u32 q12,q12,#19
+ eor r11,r11,r6,ROR #25
+ eor r2,r2,r12,ROR #25
+ add r6,r11,r10
+ str r3,[sp,#100]
+ add r3,r2,r9
+ ldr r12,[sp,#124]
+ veor q4,q4,q13
+ veor q5,q5,q14
+ veor q4,q4,q11
+ veor q5,q5,q12
+ eor r6,r7,r6,ROR #23
+ eor r3,r12,r3,ROR #23
+ add r7,r6,r11
+ add r12,r3,r2
+ vadd.i32 q11,q2,q4
+ vswp d4,d5
+ vadd.i32 q12,q6,q5
+ vshl.i32 q13,q11,#18
+ vswp d12,d13
+ vshl.i32 q14,q12,#18
+ eor r7,r14,r7,ROR #19
+ eor r8,r8,r12,ROR #19
+ add r12,r7,r6
+ add r14,r8,r3
+ vshr.u32 q11,q11,#14
+ vext.32 q4,q4,q4,#1
+ vshr.u32 q12,q12,#14
+ veor q8,q8,q13
+ vext.32 q5,q5,q5,#1
+ veor q10,q10,q14
+ eor r10,r10,r12,ROR #14
+ veor q8,q8,q11
+ eor r9,r9,r14,ROR #14
+ veor q10,q10,q12
+ vadd.i32 q11,q7,q8
+ vadd.i32 q12,q9,q10
+ add r12,r0,r2
+ add r14,r5,r1
+ vshl.i32 q13,q11,#7
+ vshl.i32 q14,q12,#7
+ vshr.u32 q11,q11,#25
+ vshr.u32 q12,q12,#25
+ eor r4,r4,r12,ROR #25
+ eor r7,r7,r14,ROR #25
+ add r12,r4,r0
+ add r14,r7,r5
+ veor q4,q4,q13
+ veor q5,q5,q14
+ veor q4,q4,q11
+ veor q5,q5,q12
+ eor r6,r6,r12,ROR #23
+ eor r3,r3,r14,ROR #23
+ add r12,r6,r4
+ str r7,[sp,#116]
+ add r7,r3,r7
+ ldr r14,[sp,#108]
+ vadd.i32 q11,q8,q4
+ vadd.i32 q12,q10,q5
+ vshl.i32 q13,q11,#9
+ vshl.i32 q14,q12,#9
+ vshr.u32 q11,q11,#23
+ vshr.u32 q12,q12,#23
+ veor q2,q2,q13
+ veor q6,q6,q14
+ veor q2,q2,q11
+ veor q6,q6,q12
+ eor r2,r2,r12,ROR #19
+ str r2,[sp,#120]
+ eor r1,r1,r7,ROR #19
+ ldr r7,[sp,#96]
+ add r2,r2,r6
+ str r6,[sp,#112]
+ add r6,r1,r3
+ ldr r12,[sp,#104]
+ vadd.i32 q11,q4,q2
+ vext.32 q4,q4,q4,#3
+ vadd.i32 q12,q5,q6
+ vshl.i32 q13,q11,#13
+ vext.32 q5,q5,q5,#3
+ vshl.i32 q14,q12,#13
+ eor r0,r0,r2,ROR #14
+ eor r2,r5,r6,ROR #14
+ str r3,[sp,#124]
+ add r3,r10,r12
+ ldr r5,[sp,#100]
+ add r6,r9,r11
+ vshr.u32 q11,q11,#19
+ vshr.u32 q12,q12,#19
+ veor q7,q7,q13
+ veor q9,q9,q14
+ eor r8,r8,r3,ROR #25
+ eor r3,r5,r6,ROR #25
+ add r5,r8,r10
+ add r6,r3,r9
+ veor q7,q7,q11
+ veor q9,q9,q12
+ eor r5,r7,r5,ROR #23
+ eor r6,r14,r6,ROR #23
+ add r7,r5,r8
+ add r14,r6,r3
+ vadd.i32 q11,q2,q7
+ vswp d4,d5
+ vadd.i32 q12,q6,q9
+ vshl.i32 q13,q11,#18
+ vswp d12,d13
+ vshl.i32 q14,q12,#18
+ eor r7,r12,r7,ROR #19
+ eor r11,r11,r14,ROR #19
+ add r12,r7,r5
+ add r14,r11,r6
+ vshr.u32 q11,q11,#14
+ vext.32 q7,q7,q7,#1
+ vshr.u32 q12,q12,#14
+ veor q8,q8,q13
+ vext.32 q9,q9,q9,#1
+ veor q10,q10,q14
+ eor r10,r10,r12,ROR #14
+ eor r9,r9,r14,ROR #14
+ add r12,r0,r3
+ add r14,r2,r4
+ veor q8,q8,q11
+ veor q11,q10,q12
+ eor r1,r1,r12,ROR #25
+ eor r7,r7,r14,ROR #25
+ add r12,r1,r0
+ add r14,r7,r2
+ vadd.i32 q10,q4,q8
+ vadd.i32 q12,q5,q11
+ vshl.i32 q13,q10,#7
+ vshl.i32 q14,q12,#7
+ eor r5,r5,r12,ROR #23
+ eor r6,r6,r14,ROR #23
+ vshr.u32 q10,q10,#25
+ vshr.u32 q12,q12,#25
+ add r12,r5,r1
+ add r14,r6,r7
+ veor q7,q7,q13
+ veor q9,q9,q14
+ veor q7,q7,q10
+ veor q9,q9,q12
+ vadd.i32 q10,q8,q7
+ vadd.i32 q12,q11,q9
+ vshl.i32 q13,q10,#9
+ vshl.i32 q14,q12,#9
+ eor r3,r3,r12,ROR #19
+ str r7,[sp,#104]
+ eor r4,r4,r14,ROR #19
+ ldr r7,[sp,#112]
+ add r12,r3,r5
+ str r6,[sp,#108]
+ add r6,r4,r6
+ ldr r14,[sp,#116]
+ eor r0,r0,r12,ROR #14
+ str r5,[sp,#96]
+ eor r5,r2,r6,ROR #14
+ ldr r2,[sp,#120]
+ vshr.u32 q10,q10,#23
+ vshr.u32 q12,q12,#23
+ veor q2,q2,q13
+ veor q6,q6,q14
+ veor q2,q2,q10
+ veor q6,q6,q12
+ add r6,r10,r14
+ add r12,r9,r8
+ vadd.i32 q12,q7,q2
+ vext.32 q10,q7,q7,#3
+ vadd.i32 q7,q9,q6
+ vshl.i32 q14,q12,#13
+ vext.32 q13,q9,q9,#3
+ vshl.i32 q9,q7,#13
+ vshr.u32 q12,q12,#19
+ vshr.u32 q7,q7,#19
+ eor r11,r11,r6,ROR #25
+ eor r2,r2,r12,ROR #25
+ add r6,r11,r10
+ str r3,[sp,#100]
+ add r3,r2,r9
+ ldr r12,[sp,#124]
+ veor q4,q4,q14
+ veor q5,q5,q9
+ veor q4,q4,q12
+ veor q7,q5,q7
+ eor r6,r7,r6,ROR #23
+ eor r3,r12,r3,ROR #23
+ add r7,r6,r11
+ add r12,r3,r2
+ vadd.i32 q5,q2,q4
+ vswp d4,d5
+ vadd.i32 q9,q6,q7
+ vshl.i32 q12,q5,#18
+ vswp d12,d13
+ vshl.i32 q14,q9,#18
+ eor r7,r14,r7,ROR #19
+ eor r8,r8,r12,ROR #19
+ add r12,r7,r6
+ add r14,r8,r3
+ vshr.u32 q15,q5,#14
+ vext.32 q5,q4,q4,#1
+ vshr.u32 q4,q9,#14
+ veor q8,q8,q12
+ vext.32 q7,q7,q7,#1
+ veor q9,q11,q14
+ eor r10,r10,r12,ROR #14
+ ldr r12,[sp,#248]
+ veor q8,q8,q15
+ eor r9,r9,r14,ROR #14
+ veor q11,q9,q4
+ subs r12,r12,#4
+ bhi .L_mainloop2
+ strd r8,[sp,#112]
+ ldrd r8,[sp,#64]
+ strd r2,[sp,#120]
+ ldrd r2,[sp,#96]
+ add r0,r0,r8
+ strd r10,[sp,#96]
+ add r1,r1,r9
+ ldrd r10,[sp,#48]
+ ldrd r8,[sp,#72]
+ add r2,r2,r10
+ strd r6,[sp,#128]
+ add r3,r3,r11
+ ldrd r6,[sp,#104]
+ ldrd r10,[sp,#32]
+ ldr r12,[sp,#236]
+ add r4,r4,r8
+ add r5,r5,r9
+ add r6,r6,r10
+ add r7,r7,r11
+ cmp r12,#0
+ beq .L_nomessage1
+ ldr r8,[r12,#0]
+ ldr r9,[r12,#4]
+ ldr r10,[r12,#8]
+ ldr r11,[r12,#12]
+ eor r0,r0,r8
+ ldr r8,[r12,#16]
+ eor r1,r1,r9
+ ldr r9,[r12,#20]
+ eor r2,r2,r10
+ ldr r10,[r12,#24]
+ eor r3,r3,r11
+ ldr r11,[r12,#28]
+ eor r4,r4,r8
+ eor r5,r5,r9
+ eor r6,r6,r10
+ eor r7,r7,r11
+.L_nomessage1:
+ ldr r14,[sp,#232]
+ vadd.i32 q4,q8,q1
+ str r0,[r14,#0]
+ add r0,sp,#304
+ str r1,[r14,#4]
+ vld1.8 {d16-d17},[r0,: 128]
+ str r2,[r14,#8]
+ vadd.i32 q5,q8,q5
+ str r3,[r14,#12]
+ add r0,sp,#288
+ str r4,[r14,#16]
+ vld1.8 {d16-d17},[r0,: 128]
+ str r5,[r14,#20]
+ vadd.i32 q9,q10,q0
+ str r6,[r14,#24]
+ vadd.i32 q2,q8,q2
+ str r7,[r14,#28]
+ vmov.i64 q8,#0xffffffff
+ ldrd r6,[sp,#128]
+ vext.32 d20,d8,d10,#1
+ ldrd r0,[sp,#40]
+ vext.32 d25,d9,d11,#1
+ ldrd r2,[sp,#120]
+ vbif q4,q9,q8
+ ldrd r4,[sp,#56]
+ vext.32 d21,d5,d19,#1
+ add r6,r6,r0
+ vext.32 d24,d4,d18,#1
+ add r7,r7,r1
+ vbif q2,q5,q8
+ add r2,r2,r4
+ vrev64.i32 q5,q10
+ add r3,r3,r5
+ vrev64.i32 q9,q12
+ adds r0,r0,#3
+ vswp d5,d9
+ adc r1,r1,#0
+ strd r0,[sp,#40]
+ ldrd r8,[sp,#112]
+ ldrd r0,[sp,#88]
+ ldrd r10,[sp,#96]
+ ldrd r4,[sp,#80]
+ add r0,r8,r0
+ add r1,r9,r1
+ add r4,r10,r4
+ add r5,r11,r5
+ add r8,r14,#64
+ cmp r12,#0
+ beq .L_nomessage2
+ ldr r9,[r12,#32]
+ ldr r10,[r12,#36]
+ ldr r11,[r12,#40]
+ ldr r14,[r12,#44]
+ eor r6,r6,r9
+ ldr r9,[r12,#48]
+ eor r7,r7,r10
+ ldr r10,[r12,#52]
+ eor r4,r4,r11
+ ldr r11,[r12,#56]
+ eor r5,r5,r14
+ ldr r14,[r12,#60]
+ add r12,r12,#64
+ eor r2,r2,r9
+ vld1.8 {d20-d21},[r12]!
+ veor q4,q4,q10
+ eor r3,r3,r10
+ vld1.8 {d20-d21},[r12]!
+ veor q5,q5,q10
+ eor r0,r0,r11
+ vld1.8 {d20-d21},[r12]!
+ veor q2,q2,q10
+ eor r1,r1,r14
+ vld1.8 {d20-d21},[r12]!
+ veor q9,q9,q10
+.L_nomessage2:
+ vst1.8 {d8-d9},[r8]!
+ vst1.8 {d10-d11},[r8]!
+ vmov.i64 q4,#0xff
+ vst1.8 {d4-d5},[r8]!
+ vst1.8 {d18-d19},[r8]!
+ str r6,[r8,#-96]
+ add r6,sp,#336
+ str r7,[r8,#-92]
+ add r7,sp,#320
+ str r4,[r8,#-88]
+ vadd.i32 q2,q11,q1
+ vld1.8 {d10-d11},[r6,: 128]
+ vadd.i32 q5,q5,q7
+ vld1.8 {d14-d15},[r7,: 128]
+ vadd.i32 q9,q13,q0
+ vadd.i32 q6,q7,q6
+ str r5,[r8,#-84]
+ vext.32 d14,d4,d10,#1
+ str r2,[r8,#-80]
+ vext.32 d21,d5,d11,#1
+ str r3,[r8,#-76]
+ vbif q2,q9,q8
+ str r0,[r8,#-72]
+ vext.32 d15,d13,d19,#1
+ vshr.u32 q4,q4,#7
+ str r1,[r8,#-68]
+ vext.32 d20,d12,d18,#1
+ vbif q6,q5,q8
+ ldr r0,[sp,#240]
+ vrev64.i32 q5,q7
+ vrev64.i32 q7,q10
+ vswp d13,d5
+ vadd.i64 q3,q3,q4
+ sub r0,r0,#192
+ cmp r12,#0
+ beq .L_nomessage21
+ vld1.8 {d16-d17},[r12]!
+ veor q2,q2,q8
+ vld1.8 {d16-d17},[r12]!
+ veor q5,q5,q8
+ vld1.8 {d16-d17},[r12]!
+ veor q6,q6,q8
+ vld1.8 {d16-d17},[r12]!
+ veor q7,q7,q8
+.L_nomessage21:
+ vst1.8 {d4-d5},[r8]!
+ vst1.8 {d10-d11},[r8]!
+ vst1.8 {d12-d13},[r8]!
+ vst1.8 {d14-d15},[r8]!
+ str r12,[sp,#236]
+ add r14,sp,#272
+ add r12,sp,#256
+ str r8,[sp,#232]
+ cmp r0,#192
+ bhs .L_mlenatleast192
+.L_mlenlowbelow192:
+ cmp r0,#0
+ beq .L_done
+ b .L_mlenatleast1
+.L_nextblock:
+ sub r0,r0,#64
+.L_mlenatleast1:
+.L_handleblock:
+ str r0,[sp,#248]
+ ldrd r2,[sp,#48]
+ ldrd r6,[sp,#32]
+ ldrd r0,[sp,#64]
+ ldrd r4,[sp,#72]
+ ldrd r10,[sp,#80]
+ ldrd r8,[sp,#88]
+ strd r2,[sp,#96]
+ strd r6,[sp,#104]
+ ldrd r2,[sp,#56]
+ ldrd r6,[sp,#40]
+ ldr r12,[sp,#244]
+.L_mainloop1:
+ str r12,[sp,#252]
+ add r12,r0,r2
+ add r14,r5,r1
+ eor r4,r4,r12,ROR #25
+ eor r7,r7,r14,ROR #25
+ add r12,r4,r0
+ add r14,r7,r5
+ eor r6,r6,r12,ROR #23
+ eor r3,r3,r14,ROR #23
+ add r12,r6,r4
+ str r7,[sp,#132]
+ add r7,r3,r7
+ ldr r14,[sp,#104]
+ eor r2,r2,r12,ROR #19
+ str r6,[sp,#128]
+ eor r1,r1,r7,ROR #19
+ ldr r7,[sp,#100]
+ add r6,r2,r6
+ str r2,[sp,#120]
+ add r2,r1,r3
+ ldr r12,[sp,#96]
+ eor r0,r0,r6,ROR #14
+ str r3,[sp,#124]
+ eor r2,r5,r2,ROR #14
+ ldr r3,[sp,#108]
+ add r5,r10,r14
+ add r6,r9,r11
+ eor r8,r8,r5,ROR #25
+ eor r5,r7,r6,ROR #25
+ add r6,r8,r10
+ add r7,r5,r9
+ eor r6,r12,r6,ROR #23
+ eor r3,r3,r7,ROR #23
+ add r7,r6,r8
+ add r12,r3,r5
+ eor r7,r14,r7,ROR #19
+ eor r11,r11,r12,ROR #19
+ add r12,r7,r6
+ add r14,r11,r3
+ eor r10,r10,r12,ROR #14
+ eor r9,r9,r14,ROR #14
+ add r12,r0,r5
+ add r14,r2,r4
+ eor r1,r1,r12,ROR #25
+ eor r7,r7,r14,ROR #25
+ add r12,r1,r0
+ add r14,r7,r2
+ eor r6,r6,r12,ROR #23
+ eor r3,r3,r14,ROR #23
+ add r12,r6,r1
+ str r7,[sp,#104]
+ add r7,r3,r7
+ ldr r14,[sp,#128]
+ eor r5,r5,r12,ROR #19
+ str r3,[sp,#108]
+ eor r4,r4,r7,ROR #19
+ ldr r7,[sp,#132]
+ add r12,r5,r6
+ str r6,[sp,#96]
+ add r3,r4,r3
+ ldr r6,[sp,#120]
+ eor r0,r0,r12,ROR #14
+ str r5,[sp,#100]
+ eor r5,r2,r3,ROR #14
+ ldr r3,[sp,#124]
+ add r2,r10,r7
+ add r12,r9,r8
+ eor r11,r11,r2,ROR #25
+ eor r2,r6,r12,ROR #25
+ add r6,r11,r10
+ add r12,r2,r9
+ eor r6,r14,r6,ROR #23
+ eor r3,r3,r12,ROR #23
+ add r12,r6,r11
+ add r14,r3,r2
+ eor r7,r7,r12,ROR #19
+ eor r8,r8,r14,ROR #19
+ add r12,r7,r6
+ add r14,r8,r3
+ eor r10,r10,r12,ROR #14
+ eor r9,r9,r14,ROR #14
+ ldr r12,[sp,#252]
+ subs r12,r12,#2
+ bhi .L_mainloop1
+ strd r6,[sp,#128]
+ strd r2,[sp,#120]
+ strd r10,[sp,#112]
+ strd r8,[sp,#136]
+ ldrd r2,[sp,#96]
+ ldrd r6,[sp,#104]
+ ldrd r8,[sp,#64]
+ ldrd r10,[sp,#48]
+ add r0,r0,r8
+ add r1,r1,r9
+ add r2,r2,r10
+ add r3,r3,r11
+ ldrd r8,[sp,#72]
+ ldrd r10,[sp,#32]
+ add r4,r4,r8
+ add r5,r5,r9
+ add r6,r6,r10
+ add r7,r7,r11
+ ldr r12,[sp,#236]
+ cmp r12,#0
+ beq .L_nomessage10
+ ldr r8,[r12,#0]
+ ldr r9,[r12,#4]
+ ldr r10,[r12,#8]
+ ldr r11,[r12,#12]
+ eor r0,r0,r8
+ ldr r8,[r12,#16]
+ eor r1,r1,r9
+ ldr r9,[r12,#20]
+ eor r2,r2,r10
+ ldr r10,[r12,#24]
+ eor r3,r3,r11
+ ldr r11,[r12,#28]
+ eor r4,r4,r8
+ eor r5,r5,r9
+ eor r6,r6,r10
+ eor r7,r7,r11
+.L_nomessage10:
+ ldr r14,[sp,#232]
+ str r0,[r14,#0]
+ str r1,[r14,#4]
+ str r2,[r14,#8]
+ str r3,[r14,#12]
+ str r4,[r14,#16]
+ str r5,[r14,#20]
+ str r6,[r14,#24]
+ str r7,[r14,#28]
+ ldrd r6,[sp,#128]
+ ldrd r10,[sp,#112]
+ ldrd r0,[sp,#40]
+ ldrd r4,[sp,#80]
+ add r6,r6,r0
+ add r7,r7,r1
+ add r10,r10,r4
+ add r11,r11,r5
+ adds r0,r0,#1
+ adc r1,r1,#0
+ strd r0,[sp,#40]
+ ldrd r2,[sp,#120]
+ ldrd r8,[sp,#136]
+ ldrd r4,[sp,#56]
+ ldrd r0,[sp,#88]
+ add r2,r2,r4
+ add r3,r3,r5
+ add r0,r8,r0
+ add r1,r9,r1
+ cmp r12,#0
+ beq .L_nomessage11
+ ldr r4,[r12,#32]
+ ldr r5,[r12,#36]
+ ldr r8,[r12,#40]
+ ldr r9,[r12,#44]
+ eor r6,r6,r4
+ ldr r4,[r12,#48]
+ eor r7,r7,r5
+ ldr r5,[r12,#52]
+ eor r10,r10,r8
+ ldr r8,[r12,#56]
+ eor r11,r11,r9
+ ldr r9,[r12,#60]
+ eor r2,r2,r4
+ eor r3,r3,r5
+ eor r0,r0,r8
+ eor r1,r1,r9
+ add r4,r12,#64
+ str r4,[sp,#236]
+.L_nomessage11:
+ str r6,[r14,#32]
+ str r7,[r14,#36]
+ str r10,[r14,#40]
+ str r11,[r14,#44]
+ str r2,[r14,#48]
+ str r3,[r14,#52]
+ str r0,[r14,#56]
+ str r1,[r14,#60]
+ add r0,r14,#64
+ str r0,[sp,#232]
+ ldr r0,[sp,#248]
+ cmp r0,#64
+ bhi .L_nextblock
+.L_done:
+ ldr r2,[sp,#160]
+ ldrd r4,[sp,#0]
+ ldrd r6,[sp,#8]
+ ldrd r8,[sp,#16]
+ ldrd r10,[sp,#24]
+ ldr r12,[sp,#228]
+ ldr r14,[sp,#224]
+ ldrd r0,[sp,#40]
+ strd r0,[r2]
+ sub r0,r12,sp
+ mov sp,r12
+ vpop {q4,q5,q6,q7}
+ add r0,r0,#64
+ bx lr
+.size _gcry_arm_neon_salsa20_encrypt,.-_gcry_arm_neon_salsa20_encrypt;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/salsa20.c b/comm/third_party/libgcrypt/cipher/salsa20.c
new file mode 100644
index 0000000000..d8c5c81f30
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/salsa20.c
@@ -0,0 +1,600 @@
+/* salsa20.c - Bernstein's Salsa20 cipher
+ * Copyright (C) 2012 Simon Josefsson, Niels Möller
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * For a description of the algorithm, see:
+ * http://cr.yp.to/snuffle/spec.pdf
+ * http://cr.yp.to/snuffle/design.pdf
+ */
+
+/* The code is based on the code in Nettle
+ (git commit id 9d2d8ddaee35b91a4e1a32ae77cba04bea3480e7)
+ which in turn is based on
+ salsa20-ref.c version 20051118
+ D. J. Bernstein
+ Public domain.
+*/
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+
+
+/* USE_AMD64 indicates whether to compile with AMD64 code. */
+#undef USE_AMD64
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64 1
+#endif
+
+/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
+#undef USE_ARM_NEON_ASM
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_ARM_NEON_ASM 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+
+#define SALSA20_MIN_KEY_SIZE 16 /* Bytes. */
+#define SALSA20_MAX_KEY_SIZE 32 /* Bytes. */
+#define SALSA20_BLOCK_SIZE 64 /* Bytes. */
+#define SALSA20_IV_SIZE 8 /* Bytes. */
+#define SALSA20_INPUT_LENGTH 16 /* Bytes. */
+
+/* Number of rounds. The standard uses 20 rounds. In any case the
+ number of rounds must be even. */
+#define SALSA20_ROUNDS 20
+#define SALSA20R12_ROUNDS 12
+
+
+struct SALSA20_context_s;
+
+typedef unsigned int (*salsa20_core_t) (u32 *dst, struct SALSA20_context_s *ctx,
+ unsigned int rounds);
+typedef void (* salsa20_keysetup_t)(struct SALSA20_context_s *ctx,
+ const byte *key, int keylen);
+typedef void (* salsa20_ivsetup_t)(struct SALSA20_context_s *ctx,
+ const byte *iv);
+
+typedef struct SALSA20_context_s
+{
+ /* Indices 1-4 and 11-14 holds the key (two identical copies for the
+ shorter key size), indices 0, 5, 10, 15 are constant, indices 6, 7
+ are the IV, and indices 8, 9 are the block counter:
+
+ C K K K
+ K C I I
+ B B C K
+ K K K C
+ */
+ u32 input[SALSA20_INPUT_LENGTH];
+ u32 pad[SALSA20_INPUT_LENGTH];
+ unsigned int unused; /* bytes in the pad. */
+#ifdef USE_ARM_NEON_ASM
+ int use_neon;
+#endif
+ salsa20_keysetup_t keysetup;
+ salsa20_ivsetup_t ivsetup;
+ salsa20_core_t core;
+} SALSA20_context_t;
+
+
+/* The masking of the right shift is needed to allow n == 0 (using
+ just 32 - n and 64 - n results in undefined behaviour). Most uses
+ of these macros use a constant and non-zero rotation count. */
+#define ROTL32(n,x) (((x)<<(n)) | ((x)>>((-(n)&31))))
+
+
+#define LE_SWAP32(v) le_bswap32(v)
+
+#define LE_READ_UINT32(p) buf_get_le32(p)
+
+
+static void salsa20_setiv (void *context, const byte *iv, size_t ivlen);
+static const char *selftest (void);
+
+
+#ifdef USE_AMD64
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+/* AMD64 assembly implementations of Salsa20. */
+void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits)
+ ASM_FUNC_ABI;
+void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv)
+ ASM_FUNC_ABI;
+unsigned int
+_gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst,
+ size_t len, int rounds) ASM_FUNC_ABI;
+
+static void
+salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
+{
+ _gcry_salsa20_amd64_keysetup(ctx->input, key, keylen * 8);
+}
+
+static void
+salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv)
+{
+ _gcry_salsa20_amd64_ivsetup(ctx->input, iv);
+}
+
+static unsigned int
+salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
+{
+ memset(dst, 0, SALSA20_BLOCK_SIZE);
+ return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds)
+ + ASM_EXTRA_STACK;
+}
+
+#else /* USE_AMD64 */
+
+
+
+#if 0
+# define SALSA20_CORE_DEBUG(i) do { \
+ unsigned debug_j; \
+ for (debug_j = 0; debug_j < 16; debug_j++) \
+ { \
+ if (debug_j == 0) \
+ fprintf(stderr, "%2d:", (i)); \
+ else if (debug_j % 4 == 0) \
+ fprintf(stderr, "\n "); \
+ fprintf(stderr, " %8x", pad[debug_j]); \
+ } \
+ fprintf(stderr, "\n"); \
+ } while (0)
+#else
+# define SALSA20_CORE_DEBUG(i)
+#endif
+
+#define QROUND(x0, x1, x2, x3) \
+ do { \
+ x1 ^= ROTL32 ( 7, x0 + x3); \
+ x2 ^= ROTL32 ( 9, x1 + x0); \
+ x3 ^= ROTL32 (13, x2 + x1); \
+ x0 ^= ROTL32 (18, x3 + x2); \
+ } while(0)
+
+static unsigned int
+salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned rounds)
+{
+ u32 pad[SALSA20_INPUT_LENGTH], *src = ctx->input;
+ unsigned int i;
+
+ memcpy (pad, src, sizeof(pad));
+ for (i = 0; i < rounds; i += 2)
+ {
+ SALSA20_CORE_DEBUG (i);
+ QROUND (pad[0], pad[4], pad[8], pad[12]);
+ QROUND (pad[5], pad[9], pad[13], pad[1] );
+ QROUND (pad[10], pad[14], pad[2], pad[6] );
+ QROUND (pad[15], pad[3], pad[7], pad[11]);
+
+ SALSA20_CORE_DEBUG (i+1);
+ QROUND (pad[0], pad[1], pad[2], pad[3] );
+ QROUND (pad[5], pad[6], pad[7], pad[4] );
+ QROUND (pad[10], pad[11], pad[8], pad[9] );
+ QROUND (pad[15], pad[12], pad[13], pad[14]);
+ }
+ SALSA20_CORE_DEBUG (i);
+
+ for (i = 0; i < SALSA20_INPUT_LENGTH; i++)
+ {
+ u32 t = pad[i] + src[i];
+ dst[i] = LE_SWAP32 (t);
+ }
+
+ /* Update counter. */
+ if (!++src[8])
+ src[9]++;
+
+ /* burn_stack */
+ return ( 3*sizeof (void*) \
+ + 2*sizeof (void*) \
+ + 64 \
+ + sizeof (unsigned int) \
+ + sizeof (u32) );
+}
+#undef QROUND
+#undef SALSA20_CORE_DEBUG
+
+static void
+salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
+{
+ /* These constants are the little endian encoding of the string
+ "expand 32-byte k". For the 128 bit variant, the "32" in that
+ string will be fixed up to "16". */
+ ctx->input[0] = 0x61707865; /* "apxe" */
+ ctx->input[5] = 0x3320646e; /* "3 dn" */
+ ctx->input[10] = 0x79622d32; /* "yb-2" */
+ ctx->input[15] = 0x6b206574; /* "k et" */
+
+ ctx->input[1] = LE_READ_UINT32(key + 0);
+ ctx->input[2] = LE_READ_UINT32(key + 4);
+ ctx->input[3] = LE_READ_UINT32(key + 8);
+ ctx->input[4] = LE_READ_UINT32(key + 12);
+ if (keylen == SALSA20_MAX_KEY_SIZE) /* 256 bits */
+ {
+ ctx->input[11] = LE_READ_UINT32(key + 16);
+ ctx->input[12] = LE_READ_UINT32(key + 20);
+ ctx->input[13] = LE_READ_UINT32(key + 24);
+ ctx->input[14] = LE_READ_UINT32(key + 28);
+ }
+ else /* 128 bits */
+ {
+ ctx->input[11] = ctx->input[1];
+ ctx->input[12] = ctx->input[2];
+ ctx->input[13] = ctx->input[3];
+ ctx->input[14] = ctx->input[4];
+
+ ctx->input[5] -= 0x02000000; /* Change to "1 dn". */
+ ctx->input[10] += 0x00000004; /* Change to "yb-6". */
+ }
+}
+
+static void salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv)
+{
+ ctx->input[6] = LE_READ_UINT32(iv + 0);
+ ctx->input[7] = LE_READ_UINT32(iv + 4);
+ /* Reset the block counter. */
+ ctx->input[8] = 0;
+ ctx->input[9] = 0;
+}
+
+#endif /*!USE_AMD64*/
+
+#ifdef USE_ARM_NEON_ASM
+
+/* ARM NEON implementation of Salsa20. */
+unsigned int
+_gcry_arm_neon_salsa20_encrypt(void *c, const void *m, unsigned int nblks,
+ void *k, unsigned int rounds);
+
+static unsigned int
+salsa20_core_neon (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
+{
+ return _gcry_arm_neon_salsa20_encrypt(dst, NULL, 1, ctx->input, rounds);
+}
+
+static void salsa20_ivsetup_neon(SALSA20_context_t *ctx, const byte *iv)
+{
+ memcpy(ctx->input + 8, iv, 8);
+ /* Reset the block counter. */
+ memset(ctx->input + 10, 0, 8);
+}
+
+static void
+salsa20_keysetup_neon(SALSA20_context_t *ctx, const byte *key, int klen)
+{
+ static const unsigned char sigma32[16] = "expand 32-byte k";
+ static const unsigned char sigma16[16] = "expand 16-byte k";
+
+ if (klen == 16)
+ {
+ memcpy (ctx->input, key, 16);
+ memcpy (ctx->input + 4, key, 16); /* Duplicate 128-bit key. */
+ memcpy (ctx->input + 12, sigma16, 16);
+ }
+ else
+ {
+ /* 32-byte key */
+ memcpy (ctx->input, key, 32);
+ memcpy (ctx->input + 12, sigma32, 16);
+ }
+}
+
+#endif /*USE_ARM_NEON_ASM*/
+
+
+static gcry_err_code_t
+salsa20_do_setkey (SALSA20_context_t *ctx,
+ const byte *key, unsigned int keylen)
+{
+ static int initialized;
+ static const char *selftest_failed;
+
+ if (!initialized )
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+ if (selftest_failed)
+ log_error ("SALSA20 selftest failed (%s)\n", selftest_failed );
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if (keylen != SALSA20_MIN_KEY_SIZE
+ && keylen != SALSA20_MAX_KEY_SIZE)
+ return GPG_ERR_INV_KEYLEN;
+
+ /* Default ops. */
+ ctx->keysetup = salsa20_keysetup;
+ ctx->ivsetup = salsa20_ivsetup;
+ ctx->core = salsa20_core;
+
+#ifdef USE_ARM_NEON_ASM
+ ctx->use_neon = (_gcry_get_hw_features () & HWF_ARM_NEON) != 0;
+ if (ctx->use_neon)
+ {
+ /* Use ARM NEON ops instead. */
+ ctx->keysetup = salsa20_keysetup_neon;
+ ctx->ivsetup = salsa20_ivsetup_neon;
+ ctx->core = salsa20_core_neon;
+ }
+#endif
+
+ ctx->keysetup (ctx, key, keylen);
+
+ /* We default to a zero nonce. */
+ salsa20_setiv (ctx, NULL, 0);
+
+ return 0;
+}
+
+
+static gcry_err_code_t
+salsa20_setkey (void *context, const byte *key, unsigned int keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+ gcry_err_code_t rc = salsa20_do_setkey (ctx, key, keylen);
+ (void)bulk_ops;
+ _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *));
+ return rc;
+}
+
+
+static void
+salsa20_setiv (void *context, const byte *iv, size_t ivlen)
+{
+ SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+ byte tmp[SALSA20_IV_SIZE];
+
+ if (iv && ivlen != SALSA20_IV_SIZE)
+ log_info ("WARNING: salsa20_setiv: bad ivlen=%u\n", (u32)ivlen);
+
+ if (!iv || ivlen != SALSA20_IV_SIZE)
+ memset (tmp, 0, sizeof(tmp));
+ else
+ memcpy (tmp, iv, SALSA20_IV_SIZE);
+
+ ctx->ivsetup (ctx, tmp);
+
+ /* Reset the unused pad bytes counter. */
+ ctx->unused = 0;
+
+ wipememory (tmp, sizeof(tmp));
+}
+
+
+
+/* Note: This function requires LENGTH > 0. */
+static void
+salsa20_do_encrypt_stream (SALSA20_context_t *ctx,
+ byte *outbuf, const byte *inbuf,
+ size_t length, unsigned rounds)
+{
+ unsigned int nburn, burn = 0;
+
+ if (ctx->unused)
+ {
+ unsigned char *p = (void*)ctx->pad;
+ size_t n;
+
+ gcry_assert (ctx->unused < SALSA20_BLOCK_SIZE);
+
+ n = ctx->unused;
+ if (n > length)
+ n = length;
+ buf_xor (outbuf, inbuf, p + SALSA20_BLOCK_SIZE - ctx->unused, n);
+ length -= n;
+ outbuf += n;
+ inbuf += n;
+ ctx->unused -= n;
+ if (!length)
+ return;
+ gcry_assert (!ctx->unused);
+ }
+
+#ifdef USE_AMD64
+ if (length >= SALSA20_BLOCK_SIZE)
+ {
+ size_t nblocks = length / SALSA20_BLOCK_SIZE;
+ burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf,
+ nblocks, rounds);
+ burn += ASM_EXTRA_STACK;
+ length -= SALSA20_BLOCK_SIZE * nblocks;
+ outbuf += SALSA20_BLOCK_SIZE * nblocks;
+ inbuf += SALSA20_BLOCK_SIZE * nblocks;
+ }
+#endif
+
+#ifdef USE_ARM_NEON_ASM
+ if (ctx->use_neon && length >= SALSA20_BLOCK_SIZE)
+ {
+ unsigned int nblocks = length / SALSA20_BLOCK_SIZE;
+ _gcry_arm_neon_salsa20_encrypt (outbuf, inbuf, nblocks, ctx->input,
+ rounds);
+ length -= SALSA20_BLOCK_SIZE * nblocks;
+ outbuf += SALSA20_BLOCK_SIZE * nblocks;
+ inbuf += SALSA20_BLOCK_SIZE * nblocks;
+ }
+#endif
+
+ while (length > 0)
+ {
+ /* Create the next pad and bump the block counter. Note that it
+ is the user's duty to change to another nonce not later than
+ after 2^70 processed bytes. */
+ nburn = ctx->core (ctx->pad, ctx, rounds);
+ burn = nburn > burn ? nburn : burn;
+
+ if (length <= SALSA20_BLOCK_SIZE)
+ {
+ buf_xor (outbuf, inbuf, ctx->pad, length);
+ ctx->unused = SALSA20_BLOCK_SIZE - length;
+ break;
+ }
+ buf_xor (outbuf, inbuf, ctx->pad, SALSA20_BLOCK_SIZE);
+ length -= SALSA20_BLOCK_SIZE;
+ outbuf += SALSA20_BLOCK_SIZE;
+ inbuf += SALSA20_BLOCK_SIZE;
+ }
+
+ _gcry_burn_stack (burn);
+}
+
+
+static void
+salsa20_encrypt_stream (void *context,
+ byte *outbuf, const byte *inbuf, size_t length)
+{
+ SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+
+ if (length)
+ salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20_ROUNDS);
+}
+
+
+static void
+salsa20r12_encrypt_stream (void *context,
+ byte *outbuf, const byte *inbuf, size_t length)
+{
+ SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+
+ if (length)
+ salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20R12_ROUNDS);
+}
+
+
+static const char*
+selftest (void)
+{
+ byte ctxbuf[sizeof(SALSA20_context_t) + 15];
+ SALSA20_context_t *ctx;
+ byte scratch[8+1];
+ byte buf[256+64+4];
+ int i;
+
+ static byte key_1[] =
+ { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ static const byte nonce_1[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ static const byte plaintext_1[] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ static const byte ciphertext_1[] =
+ { 0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3};
+
+ /* 16-byte alignment required for amd64 implementation. */
+ ctx = (SALSA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15);
+
+ salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
+ salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
+ scratch[8] = 0;
+ salsa20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1);
+ if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1))
+ return "Salsa20 encryption test 1 failed.";
+ if (scratch[8])
+ return "Salsa20 wrote too much.";
+ salsa20_setkey( ctx, key_1, sizeof(key_1), NULL);
+ salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
+ salsa20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1);
+ if (memcmp (scratch, plaintext_1, sizeof plaintext_1))
+ return "Salsa20 decryption test 1 failed.";
+
+ for (i = 0; i < sizeof buf; i++)
+ buf[i] = i;
+ salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
+ salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
+ /*encrypt*/
+ salsa20_encrypt_stream (ctx, buf, buf, sizeof buf);
+ /*decrypt*/
+ salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
+ salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
+ salsa20_encrypt_stream (ctx, buf, buf, 1);
+ salsa20_encrypt_stream (ctx, buf+1, buf+1, (sizeof buf)-1-1);
+ salsa20_encrypt_stream (ctx, buf+(sizeof buf)-1, buf+(sizeof buf)-1, 1);
+ for (i = 0; i < sizeof buf; i++)
+ if (buf[i] != (byte)i)
+ return "Salsa20 encryption test 2 failed.";
+
+ return NULL;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_salsa20 =
+ {
+ GCRY_CIPHER_SALSA20,
+ {0, 0}, /* flags */
+ "SALSA20", /* name */
+ NULL, /* aliases */
+ NULL, /* oids */
+ 1, /* blocksize in bytes. */
+ SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */
+ sizeof (SALSA20_context_t),
+ salsa20_setkey,
+ NULL,
+ NULL,
+ salsa20_encrypt_stream,
+ salsa20_encrypt_stream,
+ NULL,
+ NULL,
+ salsa20_setiv
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12 =
+ {
+ GCRY_CIPHER_SALSA20R12,
+ {0, 0}, /* flags */
+ "SALSA20R12", /* name */
+ NULL, /* aliases */
+ NULL, /* oids */
+ 1, /* blocksize in bytes. */
+ SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */
+ sizeof (SALSA20_context_t),
+ salsa20_setkey,
+ NULL,
+ NULL,
+ salsa20r12_encrypt_stream,
+ salsa20r12_encrypt_stream,
+ NULL,
+ NULL,
+ salsa20_setiv
+ };
diff --git a/comm/third_party/libgcrypt/cipher/scrypt.c b/comm/third_party/libgcrypt/cipher/scrypt.c
new file mode 100644
index 0000000000..13fd1cf06c
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/scrypt.c
@@ -0,0 +1,322 @@
+/* scrypt.c - Scrypt password-based key derivation function.
+ * Copyright (C) 2012 Simon Josefsson
+ * Copyright (C) 2013 Christian Grothoff
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Adapted from the nettle, low-level cryptographics library for
+ * libgcrypt by Christian Grothoff; original license:
+ *
+ * Copyright (C) 2012 Simon Josefsson
+ *
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02111-1301, USA.
+ */
+
+#include <config.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "kdf-internal.h"
+#include "bufhelp.h"
+
+/* We really need a 64 bit type for this code. */
+#define SALSA20_INPUT_LENGTH 16
+
+#define ROTL32(n,x) (((x)<<(n)) | ((x)>>(32-(n))))
+
+
+/* Reads a 64-bit integer, in network, big-endian, byte order */
+#define READ_UINT64(p) buf_get_be64(p)
+
+
+/* And the other, little-endian, byteorder */
+#define LE_READ_UINT64(p) buf_get_le64(p)
+
+#define LE_SWAP32(v) le_bswap32(v)
+
+
+#define QROUND(x0, x1, x2, x3) do { \
+ x1 ^= ROTL32(7, x0 + x3); \
+ x2 ^= ROTL32(9, x1 + x0); \
+ x3 ^= ROTL32(13, x2 + x1); \
+ x0 ^= ROTL32(18, x3 + x2); \
+ } while(0)
+
+
+static void
+salsa20_core (u32 *dst, const u32 *src, unsigned int rounds)
+{
+ u32 x[SALSA20_INPUT_LENGTH];
+ unsigned i;
+
+ assert ( (rounds & 1) == 0);
+
+ for (i = 0; i < SALSA20_INPUT_LENGTH; i++)
+ x[i] = LE_SWAP32(src[i]);
+
+ for (i = 0; i < rounds;i += 2)
+ {
+ QROUND(x[0], x[4], x[8], x[12]);
+ QROUND(x[5], x[9], x[13], x[1]);
+ QROUND(x[10], x[14], x[2], x[6]);
+ QROUND(x[15], x[3], x[7], x[11]);
+
+ QROUND(x[0], x[1], x[2], x[3]);
+ QROUND(x[5], x[6], x[7], x[4]);
+ QROUND(x[10], x[11], x[8], x[9]);
+ QROUND(x[15], x[12], x[13], x[14]);
+ }
+
+ for (i = 0; i < SALSA20_INPUT_LENGTH; i++)
+ {
+ u32 t = x[i] + LE_SWAP32(src[i]);
+ dst[i] = LE_SWAP32(t);
+ }
+}
+
+
+static void
+scrypt_block_mix (u32 r, unsigned char *B, unsigned char *tmp2)
+{
+ u64 i;
+ unsigned char *X = tmp2;
+ unsigned char *Y = tmp2 + 64;
+
+#if 0
+ if (r == 1)
+ {
+ for (i = 0; i < 2 * r; i++)
+ {
+ size_t j;
+ printf ("B[%d] = ", (int)i);
+ for (j = 0; j < 64; j++)
+ {
+ if (j && !(j % 16))
+ printf ("\n ");
+ printf (" %02x", B[i * 64 + j]);
+ }
+ putchar ('\n');
+ }
+ }
+#endif
+
+ /* X = B[2 * r - 1] */
+ memcpy (X, &B[(2 * r - 1) * 64], 64);
+
+ /* for i = 0 to 2 * r - 1 do */
+ for (i = 0; i <= 2 * r - 1; i++)
+ {
+ /* T = X xor B[i] */
+ buf_xor(X, X, &B[i * 64], 64);
+
+ /* X = Salsa (T) */
+ salsa20_core ((u32*)(void*)X, (u32*)(void*)X, 8);
+
+ /* Y[i] = X */
+ memcpy (&Y[i * 64], X, 64);
+ }
+
+ for (i = 0; i < r; i++)
+ {
+ memcpy (&B[i * 64], &Y[2 * i * 64], 64);
+ memcpy (&B[(r + i) * 64], &Y[(2 * i + 1) * 64], 64);
+ }
+
+#if 0
+ if (r==1)
+ {
+ for (i = 0; i < 2 * r; i++)
+ {
+ size_t j;
+ printf ("B'[%d] =", (int)i);
+ for (j = 0; j < 64; j++)
+ {
+ if (j && !(j % 16))
+ printf ("\n ");
+ printf (" %02x", B[i * 64 + j]);
+ }
+ putchar ('\n');
+ }
+ }
+#endif
+}
+
+
+static void
+scrypt_ro_mix (u32 r, unsigned char *B, u64 N,
+ unsigned char *tmp1, unsigned char *tmp2)
+{
+ unsigned char *X = B, *T = B;
+ u64 i;
+
+#if 0
+ if (r == 1)
+ {
+ printf ("B = ");
+ for (i = 0; i < 128 * r; i++)
+ {
+ if (i && !(i % 16))
+ printf ("\n ");
+ printf (" %02x", B[i]);
+ }
+ putchar ('\n');
+ }
+#endif
+
+ /* for i = 0 to N - 1 do */
+ for (i = 0; i <= N - 1; i++)
+ {
+ /* V[i] = X */
+ memcpy (&tmp1[i * 128 * r], X, 128 * r);
+
+ /* X = ScryptBlockMix (X) */
+ scrypt_block_mix (r, X, tmp2);
+ }
+
+ /* for i = 0 to N - 1 do */
+ for (i = 0; i <= N - 1; i++)
+ {
+ u64 j;
+
+ /* j = Integerify (X) mod N */
+ j = LE_READ_UINT64 (&X[128 * r - 64]) % N;
+
+ /* T = X xor V[j] */
+ buf_xor (T, T, &tmp1[j * 128 * r], 128 * r);
+
+ /* X = scryptBlockMix (T) */
+ scrypt_block_mix (r, T, tmp2);
+ }
+
+#if 0
+ if (r == 1)
+ {
+ printf ("B' =");
+ for (i = 0; i < 128 * r; i++)
+ {
+ if (i && !(i % 16))
+ printf ("\n ");
+ printf (" %02x", B[i]);
+ }
+ putchar ('\n');
+ }
+#endif
+}
+
+
+/*
+ *
+ */
+gcry_err_code_t
+_gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen,
+ int algo, int subalgo,
+ const unsigned char *salt, size_t saltlen,
+ unsigned long iterations,
+ size_t dkLen, unsigned char *DK)
+{
+ u64 N = subalgo; /* CPU/memory cost parameter. */
+ u32 r; /* Block size. */
+ u32 p = iterations; /* Parallelization parameter. */
+
+ gpg_err_code_t ec;
+ u32 i;
+ unsigned char *B = NULL;
+ unsigned char *tmp1 = NULL;
+ unsigned char *tmp2 = NULL;
+ size_t r128;
+ size_t nbytes;
+
+ if (subalgo < 1 || !iterations)
+ return GPG_ERR_INV_VALUE;
+
+ if (algo == GCRY_KDF_SCRYPT)
+ r = 8;
+ else if (algo == 41) /* Hack to allow the use of all test vectors. */
+ r = 1;
+ else
+ return GPG_ERR_UNKNOWN_ALGORITHM;
+
+ r128 = r * 128;
+ if (r128 / 128 != r)
+ return GPG_ERR_ENOMEM;
+
+ nbytes = p * r128;
+ if (r128 && nbytes / r128 != p)
+ return GPG_ERR_ENOMEM;
+
+ nbytes = N * r128;
+ if (r128 && nbytes / r128 != N)
+ return GPG_ERR_ENOMEM;
+
+ nbytes = 64 + r128;
+ if (nbytes < r128)
+ return GPG_ERR_ENOMEM;
+
+ B = xtrymalloc (p * r128);
+ if (!B)
+ {
+ ec = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ tmp1 = xtrymalloc (N * r128);
+ if (!tmp1)
+ {
+ ec = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ tmp2 = xtrymalloc (64 + r128);
+ if (!tmp2)
+ {
+ ec = gpg_err_code_from_syserror ();
+ goto leave;
+ }
+
+ ec = _gcry_kdf_pkdf2 (passwd, passwdlen, GCRY_MD_SHA256, salt, saltlen,
+ 1 /* iterations */, p * r128, B);
+
+ for (i = 0; !ec && i < p; i++)
+ scrypt_ro_mix (r, &B[i * r128], N, tmp1, tmp2);
+
+ for (i = 0; !ec && i < p; i++)
+ ec = _gcry_kdf_pkdf2 (passwd, passwdlen, GCRY_MD_SHA256, B, p * r128,
+ 1 /* iterations */, dkLen, DK);
+
+ leave:
+ xfree (tmp2);
+ xfree (tmp1);
+ xfree (B);
+
+ return ec;
+}
diff --git a/comm/third_party/libgcrypt/cipher/seed.c b/comm/third_party/libgcrypt/cipher/seed.c
new file mode 100644
index 0000000000..2c8958fa82
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/seed.c
@@ -0,0 +1,478 @@
+/* SEED for libgcrypt
+ * Copyright (C) 2006 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * --
+ * This implementation was provided for libgcrypt in public domain
+ * by Hye-Shik Chang <perky@FreeBSD.org>, July 2006.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+
+#define NUMKC 16
+
+#define GETU32(pt) buf_get_be32(pt)
+#define PUTU32(ct, st) buf_put_be32(ct, st)
+
+union wordbuf
+{
+ u32 w;
+ byte b[4];
+};
+
+#ifdef WORDS_BIGENDIAN
+#define b0 b[3]
+#define b1 b[2]
+#define b2 b[1]
+#define b3 b[0]
+#else
+#define b0 b[0]
+#define b1 b[1]
+#define b2 b[2]
+#define b3 b[3]
+#endif
+
+static const char *selftest(void);
+
+typedef struct
+{
+ u32 keyschedule[32];
+} SEED_context;
+
+static const u32 SS0[256] = {
+ 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, 0x14445054, 0x1d0d111c,
+ 0x2c8ca0ac, 0x25052124, 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c,
+ 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360, 0x28082028, 0x04444044,
+ 0x20002020, 0x1d8d919c, 0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314,
+ 0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378, 0x3b8bb3b8, 0x13031310,
+ 0x12c2d2d0, 0x2ecee2ec, 0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8,
+ 0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074, 0x2ccce0ec, 0x15859194,
+ 0x0b0b0308, 0x17475354, 0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100,
+ 0x24042024, 0x1c0c101c, 0x33437370, 0x18889098, 0x10001010, 0x0cccc0cc,
+ 0x32c2f2f0, 0x19c9d1d8, 0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380,
+ 0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8, 0x20406060, 0x10405050,
+ 0x2383a3a0, 0x2bcbe3e8, 0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c,
+ 0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078, 0x2686a2a4, 0x12021210,
+ 0x2f8fa3ac, 0x15c5d1d4, 0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140,
+ 0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008, 0x1f0f131c, 0x19899198,
+ 0x00000000, 0x19091118, 0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0,
+ 0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324, 0x3080b0b0, 0x0b8b8388,
+ 0x0e0e020c, 0x2b8ba3a8, 0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c,
+ 0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208, 0x3f8fb3bc, 0x2fcfe3ec,
+ 0x33c3f3f0, 0x05c5c1c4, 0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064,
+ 0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218, 0x06060204, 0x21012120,
+ 0x2b4b6368, 0x26466264, 0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288,
+ 0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0, 0x3a4a7278, 0x07474344,
+ 0x16869294, 0x25c5e1e4, 0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc,
+ 0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac, 0x36063234, 0x15051114,
+ 0x22022220, 0x38083038, 0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c,
+ 0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394, 0x35053134, 0x0bcbc3c8,
+ 0x0ecec2cc, 0x3c0c303c, 0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188,
+ 0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8, 0x14849094, 0x19495158,
+ 0x02828280, 0x04c4c0c4, 0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364,
+ 0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8, 0x0f0f030c, 0x0e8e828c,
+ 0x02424240, 0x23032320, 0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4,
+ 0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0, 0x2f4f636c, 0x3d0d313c,
+ 0x2d0d212c, 0x00404040, 0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0,
+ 0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154, 0x3b0b3338, 0x1cccd0dc,
+ 0x28486068, 0x3f4f737c, 0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254,
+ 0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244, 0x3585b1b4, 0x2b0b2328,
+ 0x25456164, 0x3acaf2f8, 0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c,
+ 0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0, 0x31013130, 0x2acae2e8,
+ 0x2d4d616c, 0x1f4f535c, 0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088,
+ 0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4, 0x22426260, 0x29092128,
+ 0x07070304, 0x33033330, 0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178,
+ 0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298,
+};
+
+static const u32 SS1[256] = {
+ 0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2, 0xcc0fcfc3, 0xdc1eced2,
+ 0xb03383b3, 0xb83888b0, 0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3,
+ 0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53, 0xc003c3c3, 0x60224262,
+ 0x30330333, 0xb43585b1, 0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3,
+ 0xd013c3d3, 0x90118191, 0x10110111, 0x04060602, 0x1c1c0c10, 0xbc3c8cb0,
+ 0x34360632, 0x480b4b43, 0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0,
+ 0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0, 0xc002c2c2, 0x44054541,
+ 0xe021c1e1, 0xd416c6d2, 0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890,
+ 0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32, 0xa42585a1, 0xf839c9f1,
+ 0x0c0d0d01, 0xdc1fcfd3, 0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72,
+ 0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272, 0x40024242, 0xd414c4d0,
+ 0x40014141, 0xc000c0c0, 0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83,
+ 0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13, 0xc80acac2, 0x2c2c0c20,
+ 0xa82a8aa2, 0x34340430, 0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1,
+ 0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0, 0x54174753, 0xac2e8ea2,
+ 0x08080800, 0xc405c5c1, 0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1,
+ 0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131, 0xf435c5f1, 0x880a8a82,
+ 0x682a4a62, 0xb03181b1, 0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202,
+ 0x20220222, 0x04040400, 0x68284860, 0x70314171, 0x04070703, 0xd81bcbd3,
+ 0x9c1d8d91, 0x98198991, 0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951,
+ 0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0, 0x981a8a92, 0xa02383a3,
+ 0xa82b8ba3, 0xd010c0d0, 0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12,
+ 0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3, 0x94168692, 0x783b4b73,
+ 0x5c1c4c50, 0xa02282a2, 0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41,
+ 0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32, 0x0c0c0c00, 0x2c2e0e22,
+ 0xb83a8ab2, 0x6c2e4e62, 0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292,
+ 0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0, 0x14150511, 0xf83bcbf3,
+ 0x70304070, 0x74354571, 0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303,
+ 0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470, 0xd415c5d1, 0xb43484b0,
+ 0xe82acae2, 0x08090901, 0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040,
+ 0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501, 0xf83acaf2, 0x00010101,
+ 0xf030c0f0, 0x282a0a22, 0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343,
+ 0x84058581, 0x14140410, 0x88098981, 0x981b8b93, 0xb03080b0, 0xe425c5e1,
+ 0x48084840, 0x78394971, 0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282,
+ 0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53, 0x74374773, 0x54144450,
+ 0xb03282b2, 0x1c1d0d11, 0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642,
+ 0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3, 0x7c3e4e72, 0xd81acad2,
+ 0xc809c9c1, 0xfc3dcdf1, 0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30,
+ 0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70, 0x0c0e0e02, 0x50104050,
+ 0x38390931, 0x24260622, 0x30320232, 0x84048480, 0x68294961, 0x90138393,
+ 0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0, 0xc80bcbc3, 0x50134353,
+ 0x080a0a02, 0x84078783, 0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83,
+ 0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3,
+};
+
+static const u32 SS2[256] = {
+ 0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3, 0x50541444, 0x111c1d0d,
+ 0xa0ac2c8c, 0x21242505, 0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e,
+ 0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343, 0x20282808, 0x40440444,
+ 0x20202000, 0x919c1d8d, 0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707,
+ 0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b, 0xb3b83b8b, 0x13101303,
+ 0xd2d012c2, 0xe2ec2ece, 0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888,
+ 0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444, 0xe0ec2ccc, 0x91941585,
+ 0x03080b0b, 0x53541747, 0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101,
+ 0x20242404, 0x101c1c0c, 0x73703343, 0x90981888, 0x10101000, 0xc0cc0ccc,
+ 0xf2f032c2, 0xd1d819c9, 0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383,
+ 0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9, 0x60602040, 0x50501040,
+ 0xa3a02383, 0xe3e82bcb, 0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f,
+ 0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848, 0xa2a42686, 0x12101202,
+ 0xa3ac2f8f, 0xd1d415c5, 0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141,
+ 0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808, 0x131c1f0f, 0x91981989,
+ 0x00000000, 0x11181909, 0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1,
+ 0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707, 0xb0b03080, 0x83880b8b,
+ 0x020c0e0e, 0xa3a82b8b, 0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d,
+ 0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a, 0xb3bc3f8f, 0xe3ec2fcf,
+ 0xf3f033c3, 0xc1c405c5, 0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444,
+ 0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a, 0x02040606, 0x21202101,
+ 0x63682b4b, 0x62642646, 0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a,
+ 0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0, 0x72783a4a, 0x43440747,
+ 0x92941686, 0xe1e425c5, 0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf,
+ 0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e, 0x32343606, 0x11141505,
+ 0x22202202, 0x30383808, 0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c,
+ 0x81800181, 0xe1e829c9, 0x80840484, 0x93941787, 0x31343505, 0xc3c80bcb,
+ 0xc2cc0ece, 0x303c3c0c, 0x71703141, 0x11101101, 0xc3c407c7, 0x81880989,
+ 0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8, 0x90941484, 0x51581949,
+ 0x82800282, 0xc0c404c4, 0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747,
+ 0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888, 0x030c0f0f, 0x828c0e8e,
+ 0x42400242, 0x23202303, 0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484,
+ 0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2, 0x636c2f4f, 0x313c3d0d,
+ 0x212c2d0d, 0x40400040, 0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1,
+ 0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545, 0x33383b0b, 0xd0dc1ccc,
+ 0x60682848, 0x737c3f4f, 0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646,
+ 0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646, 0xb1b43585, 0x23282b0b,
+ 0x61642545, 0xf2f83aca, 0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f,
+ 0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282, 0x31303101, 0xe2e82aca,
+ 0x616c2d4d, 0x535c1f4f, 0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888,
+ 0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4, 0x62602242, 0x21282909,
+ 0x03040707, 0x33303303, 0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949,
+ 0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a,
+};
+
+static const u32 SS3[256] = {
+ 0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426, 0xcfc3cc0f, 0xced2dc1e,
+ 0x83b3b033, 0x88b0b838, 0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407,
+ 0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b, 0xc3c3c003, 0x42626022,
+ 0x03333033, 0x85b1b435, 0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427,
+ 0xc3d3d013, 0x81919011, 0x01111011, 0x06020406, 0x0c101c1c, 0x8cb0bc3c,
+ 0x06323436, 0x4b43480b, 0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828,
+ 0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434, 0xc2c2c002, 0x45414405,
+ 0xc1e1e021, 0xc6d2d416, 0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818,
+ 0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e, 0x85a1a425, 0xc9f1f839,
+ 0x0d010c0d, 0xcfd3dc1f, 0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a,
+ 0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032, 0x42424002, 0xc4d0d414,
+ 0x41414001, 0xc0c0c000, 0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b,
+ 0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f, 0xcac2c80a, 0x0c202c2c,
+ 0x8aa2a82a, 0x04303434, 0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829,
+ 0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838, 0x47535417, 0x8ea2ac2e,
+ 0x08000808, 0xc5c1c405, 0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839,
+ 0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031, 0xc5f1f435, 0x8a82880a,
+ 0x4a62682a, 0x81b1b031, 0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002,
+ 0x02222022, 0x04000404, 0x48606828, 0x41717031, 0x07030407, 0xcbd3d81b,
+ 0x8d919c1d, 0x89919819, 0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819,
+ 0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c, 0x8a92981a, 0x83a3a023,
+ 0x8ba3a82b, 0xc0d0d010, 0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a,
+ 0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f, 0x86929416, 0x4b73783b,
+ 0x4c505c1c, 0x82a2a022, 0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d,
+ 0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a, 0x0c000c0c, 0x0e222c2e,
+ 0x8ab2b83a, 0x4e626c2e, 0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012,
+ 0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c, 0x05111415, 0xcbf3f83b,
+ 0x40707030, 0x45717435, 0x4f737c3f, 0x05313435, 0x00101010, 0x03030003,
+ 0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434, 0xc5d1d415, 0x84b0b434,
+ 0xcae2e82a, 0x09010809, 0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000,
+ 0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405, 0xcaf2f83a, 0x01010001,
+ 0xc0f0f030, 0x0a22282a, 0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003,
+ 0x85818405, 0x04101414, 0x89818809, 0x8b93981b, 0x80b0b030, 0xc5e1e425,
+ 0x48404808, 0x49717839, 0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002,
+ 0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f, 0x47737437, 0x44505414,
+ 0x82b2b032, 0x0d111c1d, 0x05212425, 0x4f434c0f, 0x00000000, 0x46424406,
+ 0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b, 0x4e727c3e, 0xcad2d81a,
+ 0xc9c1c809, 0xcdf1fc3d, 0x00303030, 0x85919415, 0x45616425, 0x0c303c3c,
+ 0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c, 0x0e020c0e, 0x40505010,
+ 0x09313839, 0x06222426, 0x02323032, 0x84808404, 0x49616829, 0x83939013,
+ 0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424, 0xcbc3c80b, 0x43535013,
+ 0x0a02080a, 0x87838407, 0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f,
+ 0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437,
+};
+
+static const u32 KC[NUMKC] = {
+ 0x9e3779b9, 0x3c6ef373, 0x78dde6e6, 0xf1bbcdcc,
+ 0xe3779b99, 0xc6ef3733, 0x8dde6e67, 0x1bbcdccf,
+ 0x3779b99e, 0x6ef3733c, 0xdde6e678, 0xbbcdccf1,
+ 0x779b99e3, 0xef3733c6, 0xde6e678d, 0xbcdccf1b,
+};
+
+
+
+/* Perform the key setup.
+ */
+static gcry_err_code_t
+do_setkey (SEED_context *ctx, const byte *key, const unsigned keylen)
+{
+ static int initialized = 0;
+ static const char *selftest_failed=0;
+ u32 x1, x2, x3, x4;
+ union wordbuf t0, t1;
+ u32 *keyout = ctx->keyschedule;
+ int i;
+
+ if (!initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+ if( selftest_failed )
+ log_error ("%s\n", selftest_failed );
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if (keylen != 16)
+ return GPG_ERR_INV_KEYLEN;
+
+ x1 = GETU32 (key);
+ x2 = GETU32 (key+4);
+ x3 = GETU32 (key+8);
+ x4 = GETU32 (key+12);
+
+ for (i = 0; i < NUMKC; i++)
+ {
+ t0.w = x1 + x3 - KC[i];
+ t1.w = x2 + KC[i] - x4;
+ *(keyout++) = SS0[t0.b0] ^ SS1[t0.b1] ^ SS2[t0.b2] ^ SS3[t0.b3];
+ *(keyout++) = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3];
+
+ if (i % 2 == 0)
+ {
+ t0.w = x1;
+ x1 = (x1>>8) ^ (x2<<24);
+ x2 = (x2>>8) ^ (t0.w<<24);
+ }
+ else
+ {
+ t0.w = x3;
+ x3 = (x3<<8) ^ (x4>>24);
+ x4 = (x4<<8) ^ (t0.w>>24);
+ }
+ }
+
+ return 0;
+}
+
+static gcry_err_code_t
+seed_setkey (void *context, const byte *key, const unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ SEED_context *ctx = context;
+ int rc = do_setkey (ctx, key, keylen);
+ (void)bulk_ops;
+ _gcry_burn_stack (4*6 + sizeof(void*)*2 + sizeof(int)*2);
+ return rc;
+}
+
+
+
+#define OP(X1, X2, X3, X4, rbase) \
+ t0.w = X3 ^ ctx->keyschedule[rbase]; \
+ t1.w = X4 ^ ctx->keyschedule[rbase+1]; \
+ t1.w ^= t0.w; \
+ t1.w = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3]; \
+ t0.w += t1.w; \
+ t0.w = SS0[t0.b0] ^ SS1[t0.b1] ^ SS2[t0.b2] ^ SS3[t0.b3]; \
+ t1.w += t0.w; \
+ t1.w = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3]; \
+ t0.w += t1.w; \
+ X1 ^= t0.w; \
+ X2 ^= t1.w;
+
+/* Encrypt one block. inbuf and outbuf may be the same. */
+static void
+do_encrypt (const SEED_context *ctx, byte *outbuf, const byte *inbuf)
+{
+ u32 x1, x2, x3, x4;
+ union wordbuf t0, t1;
+
+ x1 = GETU32 (inbuf);
+ x2 = GETU32 (inbuf+4);
+ x3 = GETU32 (inbuf+8);
+ x4 = GETU32 (inbuf+12);
+
+ OP (x1, x2, x3, x4, 0);
+ OP (x3, x4, x1, x2, 2);
+ OP (x1, x2, x3, x4, 4);
+ OP (x3, x4, x1, x2, 6);
+ OP (x1, x2, x3, x4, 8);
+ OP (x3, x4, x1, x2, 10);
+ OP (x1, x2, x3, x4, 12);
+ OP (x3, x4, x1, x2, 14);
+ OP (x1, x2, x3, x4, 16);
+ OP (x3, x4, x1, x2, 18);
+ OP (x1, x2, x3, x4, 20);
+ OP (x3, x4, x1, x2, 22);
+ OP (x1, x2, x3, x4, 24);
+ OP (x3, x4, x1, x2, 26);
+ OP (x1, x2, x3, x4, 28);
+ OP (x3, x4, x1, x2, 30);
+
+ PUTU32 (outbuf, x3);
+ PUTU32 (outbuf+4, x4);
+ PUTU32 (outbuf+8, x1);
+ PUTU32 (outbuf+12, x2);
+}
+
+static unsigned int
+seed_encrypt (void *context, byte *outbuf, const byte *inbuf)
+{
+ SEED_context *ctx = context;
+
+ do_encrypt (ctx, outbuf, inbuf);
+ return /*burn_stack*/ (4*6);
+}
+
+
+
+/* Decrypt one block. inbuf and outbuf may be the same. */
+static void
+do_decrypt (SEED_context *ctx, byte *outbuf, const byte *inbuf)
+{
+ u32 x1, x2, x3, x4;
+ union wordbuf t0, t1;
+
+ x1 = GETU32 (inbuf);
+ x2 = GETU32 (inbuf+4);
+ x3 = GETU32 (inbuf+8);
+ x4 = GETU32 (inbuf+12);
+
+ OP (x1, x2, x3, x4, 30);
+ OP (x3, x4, x1, x2, 28);
+ OP (x1, x2, x3, x4, 26);
+ OP (x3, x4, x1, x2, 24);
+ OP (x1, x2, x3, x4, 22);
+ OP (x3, x4, x1, x2, 20);
+ OP (x1, x2, x3, x4, 18);
+ OP (x3, x4, x1, x2, 16);
+ OP (x1, x2, x3, x4, 14);
+ OP (x3, x4, x1, x2, 12);
+ OP (x1, x2, x3, x4, 10);
+ OP (x3, x4, x1, x2, 8);
+ OP (x1, x2, x3, x4, 6);
+ OP (x3, x4, x1, x2, 4);
+ OP (x1, x2, x3, x4, 2);
+ OP (x3, x4, x1, x2, 0);
+
+ PUTU32 (outbuf, x3);
+ PUTU32 (outbuf+4, x4);
+ PUTU32 (outbuf+8, x1);
+ PUTU32 (outbuf+12, x2);
+}
+
+static unsigned int
+seed_decrypt (void *context, byte *outbuf, const byte *inbuf)
+{
+ SEED_context *ctx = context;
+
+ do_decrypt (ctx, outbuf, inbuf);
+ return /*burn_stack*/ (4*6);
+}
+
+
+/* Test a single encryption and decryption with each key size. */
+static const char*
+selftest (void)
+{
+ SEED_context ctx;
+ byte scratch[16];
+
+ /* The test vector is taken from the appendix section B.3 of RFC4269.
+ */
+ static const byte plaintext[16] = {
+ 0x83, 0xA2, 0xF8, 0xA2, 0x88, 0x64, 0x1F, 0xB9,
+ 0xA4, 0xE9, 0xA5, 0xCC, 0x2F, 0x13, 0x1C, 0x7D
+ };
+ static const byte key[16] = {
+ 0x47, 0x06, 0x48, 0x08, 0x51, 0xE6, 0x1B, 0xE8,
+ 0x5D, 0x74, 0xBF, 0xB3, 0xFD, 0x95, 0x61, 0x85
+ };
+ static const byte ciphertext[16] = {
+ 0xEE, 0x54, 0xD1, 0x3E, 0xBC, 0xAE, 0x70, 0x6D,
+ 0x22, 0x6B, 0xC3, 0x14, 0x2C, 0xD4, 0x0D, 0x4A,
+ };
+
+ seed_setkey (&ctx, key, sizeof(key), NULL);
+ seed_encrypt (&ctx, scratch, plaintext);
+ if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
+ return "SEED test encryption failed.";
+ seed_decrypt (&ctx, scratch, scratch);
+ if (memcmp (scratch, plaintext, sizeof (plaintext)))
+ return "SEED test decryption failed.";
+
+ return NULL;
+}
+
+
+
+static gcry_cipher_oid_spec_t seed_oids[] =
+ {
+ { "1.2.410.200004.1.3", GCRY_CIPHER_MODE_ECB },
+ { "1.2.410.200004.1.4", GCRY_CIPHER_MODE_CBC },
+ { "1.2.410.200004.1.5", GCRY_CIPHER_MODE_CFB },
+ { "1.2.410.200004.1.6", GCRY_CIPHER_MODE_OFB },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_seed =
+ {
+ GCRY_CIPHER_SEED, {0, 0},
+ "SEED", NULL, seed_oids, 16, 128, sizeof (SEED_context),
+ seed_setkey, seed_encrypt, seed_decrypt,
+ };
diff --git a/comm/third_party/libgcrypt/cipher/serpent-armv7-neon.S b/comm/third_party/libgcrypt/cipher/serpent-armv7-neon.S
new file mode 100644
index 0000000000..adff639463
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/serpent-armv7-neon.S
@@ -0,0 +1,1124 @@
+/* serpent-armv7-neon.S - ARM/NEON assembly implementation of Serpent cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.text
+
+.syntax unified
+.fpu neon
+.arm
+
+/* ARM registers */
+#define RROUND r0
+
+/* NEON vector registers */
+#define RA0 q0
+#define RA1 q1
+#define RA2 q2
+#define RA3 q3
+#define RA4 q4
+#define RB0 q5
+#define RB1 q6
+#define RB2 q7
+#define RB3 q8
+#define RB4 q9
+
+#define RT0 q10
+#define RT1 q11
+#define RT2 q12
+#define RT3 q13
+
+#define RA0d0 d0
+#define RA0d1 d1
+#define RA1d0 d2
+#define RA1d1 d3
+#define RA2d0 d4
+#define RA2d1 d5
+#define RA3d0 d6
+#define RA3d1 d7
+#define RA4d0 d8
+#define RA4d1 d9
+#define RB0d0 d10
+#define RB0d1 d11
+#define RB1d0 d12
+#define RB1d1 d13
+#define RB2d0 d14
+#define RB2d1 d15
+#define RB3d0 d16
+#define RB3d1 d17
+#define RB4d0 d18
+#define RB4d1 d19
+#define RT0d0 d20
+#define RT0d1 d21
+#define RT1d0 d22
+#define RT1d1 d23
+#define RT2d0 d24
+#define RT2d1 d25
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+#define transpose_4x4(_q0, _q1, _q2, _q3) \
+ vtrn.32 _q0, _q1; \
+ vtrn.32 _q2, _q3; \
+ vswp _q0##d1, _q2##d0; \
+ vswp _q1##d1, _q3##d0;
+
+/**********************************************************************
+ 8-way serpent
+ **********************************************************************/
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ * (New York, New York, USA), p. 317–329, National Institute of Standards and
+ * Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+#define SBOX0(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ veor a3, a3, a0; veor b3, b3, b0; vmov a4, a1; vmov b4, b1; \
+ vand a1, a1, a3; vand b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \
+ veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
+ veor a0, a0, a4; veor b0, b0, b4; veor a4, a4, a3; veor b4, b4, b3; \
+ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \
+ veor a2, a2, a4; veor b2, b2, b4; vmvn a4, a4; vmvn b4, b4; \
+ vorr a4, a4, a1; vorr b4, b4, b1; veor a1, a1, a3; veor b1, b1, b3; \
+ veor a1, a1, a4; veor b1, b1, b4; vorr a3, a3, a0; vorr b3, b3, b0; \
+ veor a1, a1, a3; veor b1, b1, b3; veor a4, a3; veor b4, b3;
+
+#define SBOX0_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmvn a2, a2; vmvn b2, b2; vmov a4, a1; vmov b4, b1; \
+ vorr a1, a1, a0; vorr b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \
+ veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \
+ veor a1, a1, a3; veor b1, b1, b3; veor a0, a0, a4; veor b0, b0, b4; \
+ veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a3; vand b0, b0, b3; \
+ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a1; vorr b0, b0, b1; \
+ veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \
+ veor a2, a2, a1; veor b2, b2, b1; veor a3, a3, a0; veor b3, b3, b0; \
+ veor a3, a3, a1; veor b3, b3, b1;\
+ vand a2, a2, a3; vand b2, b2, b3;\
+ veor a4, a2; veor b4, b2;
+
+#define SBOX1(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmvn a0, a0; vmvn b0, b0; vmvn a2, a2; vmvn b2, b2; \
+ vmov a4, a0; vmov b4, b0; vand a0, a0, a1; vand b0, b0, b1; \
+ veor a2, a2, a0; veor b2, b2, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
+ veor a3, a3, a2; veor b3, b3, b2; veor a1, a1, a0; veor b1, b1, b0; \
+ veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a1; vorr b4, b4, b1; \
+ veor a1, a1, a3; veor b1, b1, b3; vorr a2, a2, a0; vorr b2, b2, b0; \
+ vand a2, a2, a4; vand b2, b2, b4; veor a0, a0, a1; veor b0, b0, b1; \
+ vand a1, a1, a2; vand b1, b1, b2;\
+ veor a1, a1, a0; veor b1, b1, b0; vand a0, a0, a2; vand b0, b0, b2; \
+ veor a0, a4; veor b0, b4;
+
+#define SBOX1_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmov a4, a1; vmov b4, b1; veor a1, a1, a3; veor b1, b1, b3; \
+ vand a3, a3, a1; vand b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \
+ veor a3, a3, a0; veor b3, b3, b0; vorr a0, a0, a1; vorr b0, b0, b1; \
+ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a4; veor b0, b0, b4; \
+ vorr a0, a0, a2; vorr b0, b0, b2; veor a1, a1, a3; veor b1, b1, b3; \
+ veor a0, a0, a1; veor b0, b0, b1; vorr a1, a1, a3; vorr b1, b1, b3; \
+ veor a1, a1, a0; veor b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \
+ veor a4, a4, a1; veor b4, b4, b1; vorr a1, a1, a0; vorr b1, b1, b0; \
+ veor a1, a1, a0; veor b1, b1, b0;\
+ vorr a1, a1, a4; vorr b1, b1, b4;\
+ veor a3, a1; veor b3, b1;
+
+#define SBOX2(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmov a4, a0; vmov b4, b0; vand a0, a0, a2; vand b0, b0, b2; \
+ veor a0, a0, a3; veor b0, b0, b3; veor a2, a2, a1; veor b2, b2, b1; \
+ veor a2, a2, a0; veor b2, b2, b0; vorr a3, a3, a4; vorr b3, b3, b4; \
+ veor a3, a3, a1; veor b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \
+ vmov a1, a3; vmov b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \
+ veor a3, a3, a0; veor b3, b3, b0; vand a0, a0, a1; vand b0, b0, b1; \
+ veor a4, a4, a0; veor b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \
+ veor a1, a1, a4; veor b1, b1, b4; vmvn a4, a4; vmvn b4, b4;
+
+#define SBOX2_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \
+ vmov a4, a3; vmov b4, b3; vand a3, a3, a2; vand b3, b3, b2; \
+ veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a2; vorr b1, b1, b2; \
+ veor a1, a1, a4; veor b1, b1, b4; vand a4, a4, a3; vand b4, b4, b3; \
+ veor a2, a2, a3; veor b2, b2, b3; vand a4, a4, a0; vand b4, b4, b0; \
+ veor a4, a4, a2; veor b4, b4, b2; vand a2, a2, a1; vand b2, b2, b1; \
+ vorr a2, a2, a0; vorr b2, b2, b0; vmvn a3, a3; vmvn b3, b3; \
+ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \
+ vand a0, a0, a1; vand b0, b0, b1; veor a3, a3, a4; veor b3, b3, b4; \
+ veor a3, a0; veor b3, b0;
+
+#define SBOX3(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmov a4, a0; vmov b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
+ veor a3, a3, a1; veor b3, b3, b1; vand a1, a1, a4; vand b1, b1, b4; \
+ veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a3; veor b2, b2, b3; \
+ vand a3, a3, a0; vand b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \
+ veor a3, a3, a4; veor b3, b3, b4; veor a0, a0, a1; veor b0, b0, b1; \
+ vand a4, a4, a0; vand b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \
+ veor a4, a4, a2; veor b4, b4, b2; vorr a1, a1, a0; vorr b1, b1, b0; \
+ veor a1, a1, a2; veor b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \
+ vmov a2, a1; vmov b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \
+ veor a1, a0; veor b1, b0;
+
+#define SBOX3_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmov a4, a2; vmov b4, b2; veor a2, a2, a1; veor b2, b2, b1; \
+ veor a0, a0, a2; veor b0, b0, b2; vand a4, a4, a2; vand b4, b4, b2; \
+ veor a4, a4, a0; veor b4, b4, b0; vand a0, a0, a1; vand b0, b0, b1; \
+ veor a1, a1, a3; veor b1, b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \
+ veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \
+ veor a1, a1, a4; veor b1, b1, b4; vand a3, a3, a2; vand b3, b3, b2; \
+ veor a3, a3, a1; veor b3, b3, b1; veor a1, a1, a0; veor b1, b1, b0; \
+ vorr a1, a1, a2; vorr b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \
+ veor a1, a1, a4; veor b1, b1, b4;\
+ veor a0, a1; veor b0, b1;
+
+#define SBOX4(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ veor a1, a1, a3; veor b1, b1, b3; vmvn a3, a3; vmvn b3, b3; \
+ veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \
+ vmov a4, a1; vmov b4, b1; vand a1, a1, a3; vand b1, b1, b3; \
+ veor a1, a1, a2; veor b1, b1, b2; veor a4, a4, a3; veor b4, b4, b3; \
+ veor a0, a0, a4; veor b0, b0, b4; vand a2, a2, a4; vand b2, b2, b4; \
+ veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a1; vand b0, b0, b1; \
+ veor a3, a3, a0; veor b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \
+ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
+ veor a0, a0, a2; veor b0, b0, b2; vand a2, a2, a3; vand b2, b2, b3; \
+ vmvn a0, a0; vmvn b0, b0; veor a4, a2; veor b4, b2;
+
+#define SBOX4_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmov a4, a2; vmov b4, b2; vand a2, a2, a3; vand b2, b2, b3; \
+ veor a2, a2, a1; veor b2, b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \
+ vand a1, a1, a0; vand b1, b1, b0; veor a4, a4, a2; veor b4, b4, b2; \
+ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \
+ vmvn a0, a0; vmvn b0, b0; veor a3, a3, a4; veor b3, b3, b4; \
+ veor a1, a1, a3; veor b1, b1, b3; vand a3, a3, a0; vand b3, b3, b0; \
+ veor a3, a3, a2; veor b3, b3, b2; veor a0, a0, a1; veor b0, b0, b1; \
+ vand a2, a2, a0; vand b2, b2, b0; veor a3, a3, a0; veor b3, b3, b0; \
+ veor a2, a2, a4; veor b2, b2, b4;\
+ vorr a2, a2, a3; vorr b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \
+ veor a2, a1; veor b2, b1;
+
+#define SBOX5(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ veor a0, a0, a1; veor b0, b0, b1; veor a1, a1, a3; veor b1, b1, b3; \
+ vmvn a3, a3; vmvn b3, b3; vmov a4, a1; vmov b4, b1; \
+ vand a1, a1, a0; vand b1, b1, b0; veor a2, a2, a3; veor b2, b2, b3; \
+ veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \
+ veor a4, a4, a3; veor b4, b4, b3; vand a3, a3, a1; vand b3, b3, b1; \
+ veor a3, a3, a0; veor b3, b3, b0; veor a4, a4, a1; veor b4, b4, b1; \
+ veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a0; veor b2, b2, b0; \
+ vand a0, a0, a3; vand b0, b0, b3; vmvn a2, a2; vmvn b2, b2; \
+ veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a3; vorr b4, b4, b3; \
+ veor a2, a4; veor b2, b4;
+
+#define SBOX5_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmvn a1, a1; vmvn b1, b1; vmov a4, a3; vmov b4, b3; \
+ veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a0; vorr b3, b3, b0; \
+ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \
+ vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \
+ veor a2, a2, a4; veor b2, b2, b4; vorr a4, a4, a0; vorr b4, b4, b0; \
+ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \
+ veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \
+ vand a3, a3, a4; vand b3, b3, b4; veor a4, a4, a1; veor b4, b4, b1; \
+ veor a3, a3, a4; veor b3, b3, b4; vmvn a4, a4; vmvn b4, b4; \
+ veor a3, a0; veor b3, b0;
+
+#define SBOX6(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmvn a2, a2; vmvn b2, b2; vmov a4, a3; vmov b4, b3; \
+ vand a3, a3, a0; vand b3, b3, b0; veor a0, a0, a4; veor b0, b0, b4; \
+ veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a4; vorr b2, b2, b4; \
+ veor a1, a1, a3; veor b1, b1, b3; veor a2, a2, a0; veor b2, b2, b0; \
+ vorr a0, a0, a1; vorr b0, b0, b1; veor a2, a2, a1; veor b2, b2, b1; \
+ veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
+ veor a0, a0, a2; veor b0, b0, b2; veor a4, a4, a3; veor b4, b4, b3; \
+ veor a4, a4, a0; veor b4, b4, b0; vmvn a3, a3; vmvn b3, b3; \
+ vand a2, a2, a4; vand b2, b2, b4;\
+ veor a2, a3; veor b2, b3;
+
+#define SBOX6_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ veor a0, a0, a2; veor b0, b0, b2; vmov a4, a2; vmov b4, b2; \
+ vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \
+ vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \
+ veor a2, a2, a3; veor b2, b2, b3; vorr a4, a4, a0; vorr b4, b4, b0; \
+ veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \
+ veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a3; vand b1, b1, b3; \
+ veor a1, a1, a0; veor b1, b1, b0; veor a0, a0, a3; veor b0, b0, b3; \
+ vorr a0, a0, a2; vorr b0, b0, b2; veor a3, a3, a1; veor b3, b3, b1; \
+ veor a4, a0; veor b4, b0;
+
+#define SBOX7(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmov a4, a1; vmov b4, b1; vorr a1, a1, a2; vorr b1, b1, b2; \
+ veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \
+ veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a4; vorr b3, b3, b4; \
+ vand a3, a3, a0; vand b3, b3, b0; veor a4, a4, a2; veor b4, b4, b2; \
+ veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a4; vorr b1, b1, b4; \
+ veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a4; vorr b0, b0, b4; \
+ veor a0, a0, a2; veor b0, b0, b2; veor a1, a1, a4; veor b1, b1, b4; \
+ veor a2, a2, a1; veor b2, b2, b1; vand a1, a1, a0; vand b1, b1, b0; \
+ veor a1, a1, a4; veor b1, b1, b4; vmvn a2, a2; vmvn b2, b2; \
+ vorr a2, a2, a0; vorr b2, b2, b0;\
+ veor a4, a2; veor b4, b2;
+
+#define SBOX7_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vmov a4, a2; vmov b4, b2; veor a2, a2, a0; veor b2, b2, b0; \
+ vand a0, a0, a3; vand b0, b0, b3; vorr a4, a4, a3; vorr b4, b4, b3; \
+ vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \
+ vorr a1, a1, a0; vorr b1, b1, b0; veor a0, a0, a2; veor b0, b0, b2; \
+ vand a2, a2, a4; vand b2, b2, b4; vand a3, a3, a4; vand b3, b3, b4; \
+ veor a1, a1, a2; veor b1, b1, b2; veor a2, a2, a0; veor b2, b2, b0; \
+ vorr a0, a0, a2; vorr b0, b0, b2; veor a4, a4, a1; veor b4, b4, b1; \
+ veor a0, a0, a3; veor b0, b0, b3; veor a3, a3, a4; veor b3, b3, b4; \
+ vorr a4, a4, a0; vorr b4, b4, b0; veor a3, a3, a2; veor b3, b3, b2; \
+ veor a4, a2; veor b4, b2;
+
+/* Apply SBOX number WHICH to to the block. */
+#define SBOX(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ SBOX##which (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4)
+
+/* Apply inverse SBOX number WHICH to to the block. */
+#define SBOX_INVERSE(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ SBOX##which##_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4)
+
+/* XOR round key into block state in a0,a1,a2,a3. a4 used as temporary. */
+#define BLOCK_XOR_KEY(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vdup.32 RT3, RT0d0[0]; \
+ vdup.32 RT1, RT0d0[1]; \
+ vdup.32 RT2, RT0d1[0]; \
+ vdup.32 RT0, RT0d1[1]; \
+ veor a0, a0, RT3; veor b0, b0, RT3; \
+ veor a1, a1, RT1; veor b1, b1, RT1; \
+ veor a2, a2, RT2; veor b2, b2, RT2; \
+ veor a3, a3, RT0; veor b3, b3, RT0;
+
+#define BLOCK_LOAD_KEY_ENC() \
+ vld1.8 {RT0d0, RT0d1}, [RROUND]!;
+
+#define BLOCK_LOAD_KEY_DEC() \
+ vld1.8 {RT0d0, RT0d1}, [RROUND]; \
+ sub RROUND, RROUND, #16
+
+/* Apply the linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vshl.u32 a4, a0, #13; vshl.u32 b4, b0, #13; \
+ vshr.u32 a0, a0, #(32-13); vshr.u32 b0, b0, #(32-13); \
+ veor a0, a0, a4; veor b0, b0, b4; \
+ vshl.u32 a4, a2, #3; vshl.u32 b4, b2, #3; \
+ vshr.u32 a2, a2, #(32-3); vshr.u32 b2, b2, #(32-3); \
+ veor a2, a2, a4; veor b2, b2, b4; \
+ veor a1, a0, a1; veor b1, b0, b1; \
+ veor a1, a2, a1; veor b1, b2, b1; \
+ vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \
+ veor a3, a2, a3; veor b3, b2, b3; \
+ veor a3, a4, a3; veor b3, b4, b3; \
+ vshl.u32 a4, a1, #1; vshl.u32 b4, b1, #1; \
+ vshr.u32 a1, a1, #(32-1); vshr.u32 b1, b1, #(32-1); \
+ veor a1, a1, a4; veor b1, b1, b4; \
+ vshl.u32 a4, a3, #7; vshl.u32 b4, b3, #7; \
+ vshr.u32 a3, a3, #(32-7); vshr.u32 b3, b3, #(32-7); \
+ veor a3, a3, a4; veor b3, b3, b4; \
+ veor a0, a1, a0; veor b0, b1, b0; \
+ veor a0, a3, a0; veor b0, b3, b0; \
+ vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \
+ veor a2, a3, a2; veor b2, b3, b2; \
+ veor a2, a4, a2; veor b2, b4, b2; \
+ vshl.u32 a4, a0, #5; vshl.u32 b4, b0, #5; \
+ vshr.u32 a0, a0, #(32-5); vshr.u32 b0, b0, #(32-5); \
+ veor a0, a0, a4; veor b0, b0, b4; \
+ vshl.u32 a4, a2, #22; vshl.u32 b4, b2, #22; \
+ vshr.u32 a2, a2, #(32-22); vshr.u32 b2, b2, #(32-22); \
+ veor a2, a2, a4; veor b2, b2, b4;
+
+/* Apply the inverse linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+ vshr.u32 a4, a2, #22; vshr.u32 b4, b2, #22; \
+ vshl.u32 a2, a2, #(32-22); vshl.u32 b2, b2, #(32-22); \
+ veor a2, a2, a4; veor b2, b2, b4; \
+ vshr.u32 a4, a0, #5; vshr.u32 b4, b0, #5; \
+ vshl.u32 a0, a0, #(32-5); vshl.u32 b0, b0, #(32-5); \
+ veor a0, a0, a4; veor b0, b0, b4; \
+ vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \
+ veor a2, a3, a2; veor b2, b3, b2; \
+ veor a2, a4, a2; veor b2, b4, b2; \
+ veor a0, a1, a0; veor b0, b1, b0; \
+ veor a0, a3, a0; veor b0, b3, b0; \
+ vshr.u32 a4, a3, #7; vshr.u32 b4, b3, #7; \
+ vshl.u32 a3, a3, #(32-7); vshl.u32 b3, b3, #(32-7); \
+ veor a3, a3, a4; veor b3, b3, b4; \
+ vshr.u32 a4, a1, #1; vshr.u32 b4, b1, #1; \
+ vshl.u32 a1, a1, #(32-1); vshl.u32 b1, b1, #(32-1); \
+ veor a1, a1, a4; veor b1, b1, b4; \
+ vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \
+ veor a3, a2, a3; veor b3, b2, b3; \
+ veor a3, a4, a3; veor b3, b4, b3; \
+ veor a1, a0, a1; veor b1, b0, b1; \
+ veor a1, a2, a1; veor b1, b2, b1; \
+ vshr.u32 a4, a2, #3; vshr.u32 b4, b2, #3; \
+ vshl.u32 a2, a2, #(32-3); vshl.u32 b2, b2, #(32-3); \
+ veor a2, a2, a4; veor b2, b2, b4; \
+ vshr.u32 a4, a0, #13; vshr.u32 b4, b0, #13; \
+ vshl.u32 a0, a0, #(32-13); vshl.u32 b0, b0, #(32-13); \
+ veor a0, a0, a4; veor b0, b0, b4;
+
+/* Apply a Serpent round to eight parallel blocks. This macro increments
+ `round'. */
+#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ BLOCK_LOAD_KEY_ENC (); \
+ SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4);
+
+/* Apply the last Serpent round to eight parallel blocks. This macro increments
+ `round'. */
+#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ BLOCK_LOAD_KEY_ENC (); \
+ SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4);
+
+/* Apply an inverse Serpent round to eight parallel blocks. This macro
+ increments `round'. */
+#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \
+ na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, \
+ nb0, nb1, nb2, nb3, nb4) \
+ LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \
+ BLOCK_LOAD_KEY_DEC ();
+
+/* Apply the first inverse Serpent round to eight parallel blocks. This macro
+ increments `round'. */
+#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \
+ na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, \
+ nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ BLOCK_LOAD_KEY_DEC (); \
+ SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \
+ BLOCK_LOAD_KEY_DEC ();
+
+.align 3
+.type __serpent_enc_blk8,%function;
+__serpent_enc_blk8:
+ /* input:
+ * r0: round key pointer
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+ * blocks
+ * output:
+ * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel
+ * ciphertext blocks
+ */
+
+ transpose_4x4(RA0, RA1, RA2, RA3);
+ BLOCK_LOAD_KEY_ENC ();
+ transpose_4x4(RB0, RB1, RB2, RB3);
+
+ ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+ ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+ ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+ ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+ ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+ ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+ ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+ ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+ ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0,
+ RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0);
+ ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0,
+ RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0);
+ ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2,
+ RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2);
+ ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4,
+ RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4);
+ ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0,
+ RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0);
+ ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0,
+ RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0);
+ ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3,
+ RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3);
+ ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0,
+ RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0);
+ ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4,
+ RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4);
+ ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4,
+ RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4);
+ ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2,
+ RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2);
+ ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3,
+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3);
+ ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4,
+ RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4);
+ ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4,
+ RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4);
+ ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0,
+ RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0);
+ ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4,
+ RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4);
+ ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+ ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+ ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+ ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+ ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+ ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+ ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+ ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+
+ transpose_4x4(RA4, RA1, RA2, RA0);
+ transpose_4x4(RB4, RB1, RB2, RB0);
+
+ bx lr;
+.size __serpent_enc_blk8,.-__serpent_enc_blk8;
+
+.align 3
+.type __serpent_dec_blk8,%function;
+__serpent_dec_blk8:
+ /* input:
+ * r0: round key pointer
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * ciphertext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+ * blocks
+ */
+
+ add RROUND, RROUND, #(32*16);
+
+ transpose_4x4(RA0, RA1, RA2, RA3);
+ BLOCK_LOAD_KEY_DEC ();
+ transpose_4x4(RB0, RB1, RB2, RB3);
+
+ ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4,
+ RA3, RA0, RA1, RA4, RA2,
+ RB0, RB1, RB2, RB3, RB4,
+ RB3, RB0, RB1, RB4, RB2);
+ ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3,
+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3);
+ ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0,
+ RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0);
+ ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3,
+ RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3);
+ ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3,
+ RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3);
+ ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4,
+ RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4);
+ ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3,
+ RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3);
+ ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1,
+ RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1);
+ ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2,
+ RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2);
+ ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0,
+ RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0);
+ ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4,
+ RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4);
+ ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0,
+ RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0);
+ ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0,
+ RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0);
+ ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1,
+ RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1);
+ ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0,
+ RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0);
+ ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3,
+ RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3);
+ ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2,
+ RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2);
+ ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4,
+ RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4);
+ ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1,
+ RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1);
+ ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4);
+ ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4,
+ RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4);
+ ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3,
+ RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3);
+ ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4,
+ RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4);
+ ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0,
+ RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0);
+ ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2,
+ RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2);
+ ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1,
+ RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1);
+ ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3,
+ RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3);
+ ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1,
+ RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1);
+ ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1,
+ RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1);
+ ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0,
+ RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0);
+ ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1,
+ RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1);
+ ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4,
+ RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4);
+
+ transpose_4x4(RA0, RA1, RA2, RA3);
+ transpose_4x4(RB0, RB1, RB2, RB3);
+
+ bx lr;
+.size __serpent_dec_blk8,.-__serpent_dec_blk8;
+
+.align 3
+.globl _gcry_serpent_neon_ctr_enc
+.type _gcry_serpent_neon_ctr_enc,%function;
+_gcry_serpent_neon_ctr_enc:
+ /* input:
+ * r0: ctx, CTX
+ * r1: dst (8 blocks)
+ * r2: src (8 blocks)
+ * r3: iv
+ */
+
+ vmov.u8 RT1d0, #0xff; /* u64: -1 */
+ push {r4,lr};
+ vadd.u64 RT2d0, RT1d0, RT1d0; /* u64: -2 */
+ vpush {RA4-RB2};
+
+ /* load IV and byteswap */
+ vld1.8 {RA0}, [r3];
+ vrev64.u8 RT0, RA0; /* be => le */
+ ldr r4, [r3, #8];
+
+ /* construct IVs */
+ vsub.u64 RA2d1, RT0d1, RT2d0; /* +2 */
+ vsub.u64 RA1d1, RT0d1, RT1d0; /* +1 */
+ cmp r4, #-1;
+
+ vsub.u64 RB0d1, RA2d1, RT2d0; /* +4 */
+ vsub.u64 RA3d1, RA2d1, RT1d0; /* +3 */
+ ldr r4, [r3, #12];
+
+ vsub.u64 RB2d1, RB0d1, RT2d0; /* +6 */
+ vsub.u64 RB1d1, RB0d1, RT1d0; /* +5 */
+
+ vsub.u64 RT2d1, RB2d1, RT2d0; /* +8 */
+ vsub.u64 RB3d1, RB2d1, RT1d0; /* +7 */
+
+ vmov RA1d0, RT0d0;
+ vmov RA2d0, RT0d0;
+ vmov RA3d0, RT0d0;
+ vmov RB0d0, RT0d0;
+ rev r4, r4;
+ vmov RB1d0, RT0d0;
+ vmov RB2d0, RT0d0;
+ vmov RB3d0, RT0d0;
+ vmov RT2d0, RT0d0;
+
+ /* check need for handling 64-bit overflow and carry */
+ beq .Ldo_ctr_carry;
+
+.Lctr_carry_done:
+ /* le => be */
+ vrev64.u8 RA1, RA1;
+ vrev64.u8 RA2, RA2;
+ vrev64.u8 RA3, RA3;
+ vrev64.u8 RB0, RB0;
+ vrev64.u8 RT2, RT2;
+ vrev64.u8 RB1, RB1;
+ vrev64.u8 RB2, RB2;
+ vrev64.u8 RB3, RB3;
+ /* store new IV */
+ vst1.8 {RT2}, [r3];
+
+ bl __serpent_enc_blk8;
+
+ vld1.8 {RT0, RT1}, [r2]!;
+ vld1.8 {RT2, RT3}, [r2]!;
+ veor RA4, RA4, RT0;
+ veor RA1, RA1, RT1;
+ vld1.8 {RT0, RT1}, [r2]!;
+ veor RA2, RA2, RT2;
+ veor RA0, RA0, RT3;
+ vld1.8 {RT2, RT3}, [r2]!;
+ veor RB4, RB4, RT0;
+ veor RT0, RT0;
+ veor RB1, RB1, RT1;
+ veor RT1, RT1;
+ veor RB2, RB2, RT2;
+ veor RT2, RT2;
+ veor RB0, RB0, RT3;
+ veor RT3, RT3;
+
+ vst1.8 {RA4}, [r1]!;
+ vst1.8 {RA1}, [r1]!;
+ veor RA1, RA1;
+ vst1.8 {RA2}, [r1]!;
+ veor RA2, RA2;
+ vst1.8 {RA0}, [r1]!;
+ veor RA0, RA0;
+ vst1.8 {RB4}, [r1]!;
+ veor RB4, RB4;
+ vst1.8 {RB1}, [r1]!;
+ vst1.8 {RB2}, [r1]!;
+ vst1.8 {RB0}, [r1]!;
+
+ vpop {RA4-RB2};
+
+ /* clear the used registers */
+ veor RA3, RA3;
+ veor RB3, RB3;
+
+ pop {r4,pc};
+
+.Ldo_ctr_carry:
+ cmp r4, #-8;
+ blo .Lctr_carry_done;
+ beq .Lcarry_RT2;
+
+ cmp r4, #-6;
+ blo .Lcarry_RB3;
+ beq .Lcarry_RB2;
+
+ cmp r4, #-4;
+ blo .Lcarry_RB1;
+ beq .Lcarry_RB0;
+
+ cmp r4, #-2;
+ blo .Lcarry_RA3;
+ beq .Lcarry_RA2;
+
+ vsub.u64 RA1d0, RT1d0;
+.Lcarry_RA2:
+ vsub.u64 RA2d0, RT1d0;
+.Lcarry_RA3:
+ vsub.u64 RA3d0, RT1d0;
+.Lcarry_RB0:
+ vsub.u64 RB0d0, RT1d0;
+.Lcarry_RB1:
+ vsub.u64 RB1d0, RT1d0;
+.Lcarry_RB2:
+ vsub.u64 RB2d0, RT1d0;
+.Lcarry_RB3:
+ vsub.u64 RB3d0, RT1d0;
+.Lcarry_RT2:
+ vsub.u64 RT2d0, RT1d0;
+
+ b .Lctr_carry_done;
+.size _gcry_serpent_neon_ctr_enc,.-_gcry_serpent_neon_ctr_enc;
+
+.align 3
+.globl _gcry_serpent_neon_cfb_dec
+.type _gcry_serpent_neon_cfb_dec,%function;
+_gcry_serpent_neon_cfb_dec:
+ /* input:
+ * r0: ctx, CTX
+ * r1: dst (8 blocks)
+ * r2: src (8 blocks)
+ * r3: iv
+ */
+
+ push {lr};
+ vpush {RA4-RB2};
+
+ /* Load input */
+ vld1.8 {RA0}, [r3];
+ vld1.8 {RA1, RA2}, [r2]!;
+ vld1.8 {RA3}, [r2]!;
+ vld1.8 {RB0}, [r2]!;
+ vld1.8 {RB1, RB2}, [r2]!;
+ vld1.8 {RB3}, [r2]!;
+
+ /* Update IV */
+ vld1.8 {RT0}, [r2]!;
+ vst1.8 {RT0}, [r3];
+ mov r3, lr;
+ sub r2, r2, #(8*16);
+
+ bl __serpent_enc_blk8;
+
+ vld1.8 {RT0, RT1}, [r2]!;
+ vld1.8 {RT2, RT3}, [r2]!;
+ veor RA4, RA4, RT0;
+ veor RA1, RA1, RT1;
+ vld1.8 {RT0, RT1}, [r2]!;
+ veor RA2, RA2, RT2;
+ veor RA0, RA0, RT3;
+ vld1.8 {RT2, RT3}, [r2]!;
+ veor RB4, RB4, RT0;
+ veor RT0, RT0;
+ veor RB1, RB1, RT1;
+ veor RT1, RT1;
+ veor RB2, RB2, RT2;
+ veor RT2, RT2;
+ veor RB0, RB0, RT3;
+ veor RT3, RT3;
+
+ vst1.8 {RA4}, [r1]!;
+ vst1.8 {RA1}, [r1]!;
+ veor RA1, RA1;
+ vst1.8 {RA2}, [r1]!;
+ veor RA2, RA2;
+ vst1.8 {RA0}, [r1]!;
+ veor RA0, RA0;
+ vst1.8 {RB4}, [r1]!;
+ veor RB4, RB4;
+ vst1.8 {RB1}, [r1]!;
+ vst1.8 {RB2}, [r1]!;
+ vst1.8 {RB0}, [r1]!;
+
+ vpop {RA4-RB2};
+
+ /* clear the used registers */
+ veor RA3, RA3;
+ veor RB3, RB3;
+
+ pop {pc};
+.size _gcry_serpent_neon_cfb_dec,.-_gcry_serpent_neon_cfb_dec;
+
+.align 3
+.globl _gcry_serpent_neon_cbc_dec
+.type _gcry_serpent_neon_cbc_dec,%function;
+_gcry_serpent_neon_cbc_dec:
+ /* input:
+ * r0: ctx, CTX
+ * r1: dst (8 blocks)
+ * r2: src (8 blocks)
+ * r3: iv
+ */
+
+ push {lr};
+ vpush {RA4-RB2};
+
+ vld1.8 {RA0, RA1}, [r2]!;
+ vld1.8 {RA2, RA3}, [r2]!;
+ vld1.8 {RB0, RB1}, [r2]!;
+ vld1.8 {RB2, RB3}, [r2]!;
+ sub r2, r2, #(8*16);
+
+ bl __serpent_dec_blk8;
+
+ vld1.8 {RB4}, [r3];
+ vld1.8 {RT0, RT1}, [r2]!;
+ vld1.8 {RT2, RT3}, [r2]!;
+ veor RA0, RA0, RB4;
+ veor RA1, RA1, RT0;
+ veor RA2, RA2, RT1;
+ vld1.8 {RT0, RT1}, [r2]!;
+ veor RA3, RA3, RT2;
+ veor RB0, RB0, RT3;
+ vld1.8 {RT2, RT3}, [r2]!;
+ veor RB1, RB1, RT0;
+ veor RT0, RT0;
+ veor RB2, RB2, RT1;
+ veor RT1, RT1;
+ veor RB3, RB3, RT2;
+ veor RT2, RT2;
+ vst1.8 {RT3}, [r3]; /* store new IV */
+ veor RT3, RT3;
+
+ vst1.8 {RA0, RA1}, [r1]!;
+ veor RA0, RA0;
+ veor RA1, RA1;
+ vst1.8 {RA2, RA3}, [r1]!;
+ veor RA2, RA2;
+ vst1.8 {RB0, RB1}, [r1]!;
+ veor RA3, RA3;
+ vst1.8 {RB2, RB3}, [r1]!;
+ veor RB3, RB3;
+
+ vpop {RA4-RB2};
+
+ /* clear the used registers */
+ veor RB4, RB4;
+
+ pop {pc};
+.size _gcry_serpent_neon_cbc_dec,.-_gcry_serpent_neon_cbc_dec;
+
+.align 3
+.globl _gcry_serpent_neon_ocb_enc
+.type _gcry_serpent_neon_ocb_enc,%function;
+_gcry_serpent_neon_ocb_enc:
+ /* input:
+ * r0 : ctx, CTX
+ * r1 : dst (8 blocks)
+ * r2 : src (8 blocks)
+ * r3 : offset
+ * sp+0: checksum
+ * sp+4: L pointers (void *L[8])
+ */
+
+ push {r4-r11, ip, lr};
+ add ip, sp, #(10*4);
+
+ vpush {RA4-RB2};
+
+ ldm ip, {r4, lr};
+
+ vld1.8 {RT0}, [r3];
+ vld1.8 {RT1}, [r4];
+
+ /* Load L pointers */
+ ldm lr!, {r5, r6, r7, r8};
+ ldm lr, {r9, r10, r11, ip};
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+ vld1.8 {RA0, RA1}, [r2]!;
+ vld1.8 {RA2, RA3}, [r2]!;
+ vld1.8 {RB0, RB1}, [r2]!;
+ vld1.8 {RB2, RB3}, [r2];
+
+#define OCB_INPUT(lreg, vreg) \
+ vld1.8 {RT3}, [lreg]; \
+ veor RT0, RT3; \
+ veor RT1, vreg; \
+ veor vreg, RT0; \
+ vst1.8 {RT0}, [r1]!;
+
+ OCB_INPUT(r5, RA0);
+ OCB_INPUT(r6, RA1);
+ OCB_INPUT(r7, RA2);
+ OCB_INPUT(r8, RA3);
+ OCB_INPUT(r9, RB0);
+ OCB_INPUT(r10, RB1);
+ OCB_INPUT(r11, RB2);
+ OCB_INPUT(ip, RB3);
+#undef OCB_INPUT
+
+ sub r1, r1, #(8*16);
+ vst1.8 {RT0}, [r3];
+ vst1.8 {RT1}, [r4];
+ mov r2, r1;
+
+ bl __serpent_enc_blk8;
+
+ vld1.8 {RT0, RT1}, [r1]!;
+ veor RT0, RA4, RT0;
+ veor RT1, RA1, RT1;
+ vld1.8 {RT2, RT3}, [r1]!;
+ vst1.8 {RT0, RT1}, [r2]!;
+ veor RT2, RA2, RT2;
+ veor RT3, RA0, RT3;
+ vld1.8 {RT0, RT1}, [r1]!;
+ vst1.8 {RT2, RT3}, [r2]!;
+ veor RT0, RB4, RT0;
+ veor RT1, RB1, RT1;
+ vld1.8 {RT2, RT3}, [r1]!;
+ vst1.8 {RT0, RT1}, [r2]!;
+ veor RT2, RB2, RT2;
+ veor RT3, RB0, RT3;
+ vst1.8 {RT2, RT3}, [r2]!;
+
+ vpop {RA4-RB2};
+
+ /* clear the used registers */
+ veor RA3, RA3;
+ veor RB3, RB3;
+
+ pop {r4-r11, ip, pc};
+.size _gcry_serpent_neon_ocb_enc,.-_gcry_serpent_neon_ocb_enc;
+
+.align 3
+.globl _gcry_serpent_neon_ocb_dec
+.type _gcry_serpent_neon_ocb_dec,%function;
+_gcry_serpent_neon_ocb_dec:
+ /* input:
+ * r0 : ctx, CTX
+ * r1 : dst (8 blocks)
+ * r2 : src (8 blocks)
+ * r3 : offset
+ * sp+0: checksum
+ * sp+4: L pointers (void *L[8])
+ */
+
+ push {r4-r11, ip, lr};
+ add ip, sp, #(10*4);
+
+ vpush {RA4-RB2};
+
+ ldm ip, {r4, lr};
+
+ vld1.8 {RT0}, [r3];
+
+ /* Load L pointers */
+ ldm lr!, {r5, r6, r7, r8};
+ ldm lr, {r9, r10, r11, ip};
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+
+ vld1.8 {RA0, RA1}, [r2]!;
+ vld1.8 {RA2, RA3}, [r2]!;
+ vld1.8 {RB0, RB1}, [r2]!;
+ vld1.8 {RB2, RB3}, [r2];
+
+#define OCB_INPUT(lreg, vreg) \
+ vld1.8 {RT3}, [lreg]; \
+ veor RT0, RT3; \
+ veor vreg, RT0; \
+ vst1.8 {RT0}, [r1]!;
+
+ OCB_INPUT(r5, RA0);
+ OCB_INPUT(r6, RA1);
+ OCB_INPUT(r7, RA2);
+ OCB_INPUT(r8, RA3);
+ OCB_INPUT(r9, RB0);
+ OCB_INPUT(r10, RB1);
+ OCB_INPUT(r11, RB2);
+ OCB_INPUT(ip, RB3);
+#undef OCB_INPUT
+
+ sub r1, r1, #(8*16);
+ vst1.8 {RT0}, [r3];
+ mov r2, r1;
+
+ bl __serpent_dec_blk8;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ vld1.8 {RA4}, [r4];
+
+ vld1.8 {RT0, RT1}, [r1]!;
+ veor RA0, RA0, RT0;
+ veor RA1, RA1, RT1;
+ vld1.8 {RT2, RT3}, [r1]!;
+ veor RA4, RA4, RA0;
+ vst1.8 {RA0, RA1}, [r2]!;
+ veor RA4, RA4, RA1;
+ veor RA2, RA2, RT2;
+ veor RA3, RA3, RT3;
+ vld1.8 {RT0, RT1}, [r1]!;
+ veor RA4, RA4, RA2;
+ vst1.8 {RA2, RA3}, [r2]!;
+ veor RA4, RA4, RA3;
+ veor RB0, RB0, RT0;
+ veor RB1, RB1, RT1;
+ vld1.8 {RT2, RT3}, [r1]!;
+ veor RA4, RA4, RB0;
+ vst1.8 {RB0, RB1}, [r2]!;
+ veor RA4, RA4, RB1;
+ veor RB2, RB2, RT2;
+ veor RB3, RB3, RT3;
+ veor RA4, RA4, RB2;
+ vst1.8 {RB2, RB3}, [r2]!;
+
+ veor RA4, RA4, RB3;
+ vst1.8 {RA4}, [r4];
+
+ vpop {RA4-RB2};
+
+ /* clear the used registers */
+ veor RB4, RB4;
+
+ pop {r4-r11, ip, pc};
+.size _gcry_serpent_neon_ocb_dec,.-_gcry_serpent_neon_ocb_dec;
+
+.align 3
+.globl _gcry_serpent_neon_ocb_auth
+.type _gcry_serpent_neon_ocb_auth,%function;
+_gcry_serpent_neon_ocb_auth:
+ /* input:
+ * r0 : ctx, CTX
+ * r1 : abuf (8 blocks)
+ * r2 : offset
+ * r3 : checksum
+ * sp+0: L pointers (void *L[8])
+ */
+
+ push {r5-r11, ip, lr};
+ ldr lr, [sp, #(9*4)];
+
+ vpush {RA4-RB2};
+
+ vld1.8 {RT0}, [r2];
+
+ /* Load L pointers */
+ ldm lr!, {r5, r6, r7, r8};
+ ldm lr, {r9, r10, r11, ip};
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+
+ vld1.8 {RA0, RA1}, [r1]!;
+ vld1.8 {RA2, RA3}, [r1]!;
+ vld1.8 {RB0, RB1}, [r1]!;
+ vld1.8 {RB2, RB3}, [r1];
+
+#define OCB_INPUT(lreg, vreg) \
+ vld1.8 {RT3}, [lreg]; \
+ veor RT0, RT3; \
+ veor vreg, RT0;
+
+ OCB_INPUT(r5, RA0);
+ OCB_INPUT(r6, RA1);
+ OCB_INPUT(r7, RA2);
+ OCB_INPUT(r8, RA3);
+ OCB_INPUT(r9, RB0);
+ OCB_INPUT(r10, RB1);
+ OCB_INPUT(r11, RB2);
+ OCB_INPUT(ip, RB3);
+#undef OCB_INPUT
+
+ vst1.8 {RT0}, [r2];
+
+ bl __serpent_enc_blk8;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ vld1.8 {RT0}, [r3];
+
+ veor RA4, RB4;
+ veor RA1, RB1;
+ veor RA2, RB2;
+ veor RA0, RB0;
+
+ veor RA2, RT0;
+ veor RA1, RA4;
+ veor RA0, RA2;
+
+ veor RA0, RA1;
+
+ vst1.8 {RA0}, [r3];
+
+ vpop {RA4-RB2};
+
+ /* clear the used registers */
+ veor RA3, RA3;
+ veor RB3, RB3;
+
+ pop {r5-r11, ip, pc};
+.size _gcry_serpent_neon_ocb_auth,.-_gcry_serpent_neon_ocb_auth;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/serpent-avx2-amd64.S b/comm/third_party/libgcrypt/cipher/serpent-avx2-amd64.S
new file mode 100644
index 0000000000..dcee9b62a5
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/serpent-avx2-amd64.S
@@ -0,0 +1,1160 @@
+/* serpent-avx2-amd64.S - AVX2 implementation of Serpent cipher
+ *
+ * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \
+ defined(ENABLE_AVX2_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+/* struct serpent_context: */
+#define ctx_keys 0
+
+/* register macros */
+#define CTX %rdi
+
+/* vector registers */
+#define RA0 %ymm0
+#define RA1 %ymm1
+#define RA2 %ymm2
+#define RA3 %ymm3
+#define RA4 %ymm4
+
+#define RB0 %ymm5
+#define RB1 %ymm6
+#define RB2 %ymm7
+#define RB3 %ymm8
+#define RB4 %ymm9
+
+#define RNOT %ymm10
+#define RTMP0 %ymm11
+#define RTMP1 %ymm12
+#define RTMP2 %ymm13
+#define RTMP3 %ymm14
+#define RTMP4 %ymm15
+
+#define RNOTx %xmm10
+#define RTMP0x %xmm11
+#define RTMP1x %xmm12
+#define RTMP2x %xmm13
+#define RTMP3x %xmm14
+#define RTMP4x %xmm15
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+/* vector 32-bit rotation to left */
+#define vec_rol(reg, nleft, tmp) \
+ vpslld $(nleft), reg, tmp; \
+ vpsrld $(32 - (nleft)), reg, reg; \
+ vpor tmp, reg, reg;
+
+/* vector 32-bit rotation to right */
+#define vec_ror(reg, nright, tmp) \
+ vec_rol(reg, 32 - nright, tmp)
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/**********************************************************************
+ 16-way serpent
+ **********************************************************************/
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ * (New York, New York, USA), p. 317–329, National Institute of Standards and
+ * Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+#define SBOX0(r0, r1, r2, r3, r4) \
+ vpxor r0, r3, r3; vmovdqa r1, r4; \
+ vpand r3, r1, r1; vpxor r2, r4, r4; \
+ vpxor r0, r1, r1; vpor r3, r0, r0; \
+ vpxor r4, r0, r0; vpxor r3, r4, r4; \
+ vpxor r2, r3, r3; vpor r1, r2, r2; \
+ vpxor r4, r2, r2; vpxor RNOT, r4, r4; \
+ vpor r1, r4, r4; vpxor r3, r1, r1; \
+ vpxor r4, r1, r1; vpor r0, r3, r3; \
+ vpxor r3, r1, r1; vpxor r3, r4, r4;
+
+#define SBOX0_INVERSE(r0, r1, r2, r3, r4) \
+ vpxor RNOT, r2, r2; vmovdqa r1, r4; \
+ vpor r0, r1, r1; vpxor RNOT, r4, r4; \
+ vpxor r2, r1, r1; vpor r4, r2, r2; \
+ vpxor r3, r1, r1; vpxor r4, r0, r0; \
+ vpxor r0, r2, r2; vpand r3, r0, r0; \
+ vpxor r0, r4, r4; vpor r1, r0, r0; \
+ vpxor r2, r0, r0; vpxor r4, r3, r3; \
+ vpxor r1, r2, r2; vpxor r0, r3, r3; \
+ vpxor r1, r3, r3; \
+ vpand r3, r2, r2; \
+ vpxor r2, r4, r4;
+
+#define SBOX1(r0, r1, r2, r3, r4) \
+ vpxor RNOT, r0, r0; vpxor RNOT, r2, r2; \
+ vmovdqa r0, r4; vpand r1, r0, r0; \
+ vpxor r0, r2, r2; vpor r3, r0, r0; \
+ vpxor r2, r3, r3; vpxor r0, r1, r1; \
+ vpxor r4, r0, r0; vpor r1, r4, r4; \
+ vpxor r3, r1, r1; vpor r0, r2, r2; \
+ vpand r4, r2, r2; vpxor r1, r0, r0; \
+ vpand r2, r1, r1; \
+ vpxor r0, r1, r1; vpand r2, r0, r0; \
+ vpxor r4, r0, r0;
+
+#define SBOX1_INVERSE(r0, r1, r2, r3, r4) \
+ vmovdqa r1, r4; vpxor r3, r1, r1; \
+ vpand r1, r3, r3; vpxor r2, r4, r4; \
+ vpxor r0, r3, r3; vpor r1, r0, r0; \
+ vpxor r3, r2, r2; vpxor r4, r0, r0; \
+ vpor r2, r0, r0; vpxor r3, r1, r1; \
+ vpxor r1, r0, r0; vpor r3, r1, r1; \
+ vpxor r0, r1, r1; vpxor RNOT, r4, r4; \
+ vpxor r1, r4, r4; vpor r0, r1, r1; \
+ vpxor r0, r1, r1; \
+ vpor r4, r1, r1; \
+ vpxor r1, r3, r3;
+
+#define SBOX2(r0, r1, r2, r3, r4) \
+ vmovdqa r0, r4; vpand r2, r0, r0; \
+ vpxor r3, r0, r0; vpxor r1, r2, r2; \
+ vpxor r0, r2, r2; vpor r4, r3, r3; \
+ vpxor r1, r3, r3; vpxor r2, r4, r4; \
+ vmovdqa r3, r1; vpor r4, r3, r3; \
+ vpxor r0, r3, r3; vpand r1, r0, r0; \
+ vpxor r0, r4, r4; vpxor r3, r1, r1; \
+ vpxor r4, r1, r1; vpxor RNOT, r4, r4;
+
+#define SBOX2_INVERSE(r0, r1, r2, r3, r4) \
+ vpxor r3, r2, r2; vpxor r0, r3, r3; \
+ vmovdqa r3, r4; vpand r2, r3, r3; \
+ vpxor r1, r3, r3; vpor r2, r1, r1; \
+ vpxor r4, r1, r1; vpand r3, r4, r4; \
+ vpxor r3, r2, r2; vpand r0, r4, r4; \
+ vpxor r2, r4, r4; vpand r1, r2, r2; \
+ vpor r0, r2, r2; vpxor RNOT, r3, r3; \
+ vpxor r3, r2, r2; vpxor r3, r0, r0; \
+ vpand r1, r0, r0; vpxor r4, r3, r3; \
+ vpxor r0, r3, r3;
+
+#define SBOX3(r0, r1, r2, r3, r4) \
+ vmovdqa r0, r4; vpor r3, r0, r0; \
+ vpxor r1, r3, r3; vpand r4, r1, r1; \
+ vpxor r2, r4, r4; vpxor r3, r2, r2; \
+ vpand r0, r3, r3; vpor r1, r4, r4; \
+ vpxor r4, r3, r3; vpxor r1, r0, r0; \
+ vpand r0, r4, r4; vpxor r3, r1, r1; \
+ vpxor r2, r4, r4; vpor r0, r1, r1; \
+ vpxor r2, r1, r1; vpxor r3, r0, r0; \
+ vmovdqa r1, r2; vpor r3, r1, r1; \
+ vpxor r0, r1, r1;
+
+#define SBOX3_INVERSE(r0, r1, r2, r3, r4) \
+ vmovdqa r2, r4; vpxor r1, r2, r2; \
+ vpxor r2, r0, r0; vpand r2, r4, r4; \
+ vpxor r0, r4, r4; vpand r1, r0, r0; \
+ vpxor r3, r1, r1; vpor r4, r3, r3; \
+ vpxor r3, r2, r2; vpxor r3, r0, r0; \
+ vpxor r4, r1, r1; vpand r2, r3, r3; \
+ vpxor r1, r3, r3; vpxor r0, r1, r1; \
+ vpor r2, r1, r1; vpxor r3, r0, r0; \
+ vpxor r4, r1, r1; \
+ vpxor r1, r0, r0;
+
+#define SBOX4(r0, r1, r2, r3, r4) \
+ vpxor r3, r1, r1; vpxor RNOT, r3, r3; \
+ vpxor r3, r2, r2; vpxor r0, r3, r3; \
+ vmovdqa r1, r4; vpand r3, r1, r1; \
+ vpxor r2, r1, r1; vpxor r3, r4, r4; \
+ vpxor r4, r0, r0; vpand r4, r2, r2; \
+ vpxor r0, r2, r2; vpand r1, r0, r0; \
+ vpxor r0, r3, r3; vpor r1, r4, r4; \
+ vpxor r0, r4, r4; vpor r3, r0, r0; \
+ vpxor r2, r0, r0; vpand r3, r2, r2; \
+ vpxor RNOT, r0, r0; vpxor r2, r4, r4;
+
+#define SBOX4_INVERSE(r0, r1, r2, r3, r4) \
+ vmovdqa r2, r4; vpand r3, r2, r2; \
+ vpxor r1, r2, r2; vpor r3, r1, r1; \
+ vpand r0, r1, r1; vpxor r2, r4, r4; \
+ vpxor r1, r4, r4; vpand r2, r1, r1; \
+ vpxor RNOT, r0, r0; vpxor r4, r3, r3; \
+ vpxor r3, r1, r1; vpand r0, r3, r3; \
+ vpxor r2, r3, r3; vpxor r1, r0, r0; \
+ vpand r0, r2, r2; vpxor r0, r3, r3; \
+ vpxor r4, r2, r2; \
+ vpor r3, r2, r2; vpxor r0, r3, r3; \
+ vpxor r1, r2, r2;
+
+#define SBOX5(r0, r1, r2, r3, r4) \
+ vpxor r1, r0, r0; vpxor r3, r1, r1; \
+ vpxor RNOT, r3, r3; vmovdqa r1, r4; \
+ vpand r0, r1, r1; vpxor r3, r2, r2; \
+ vpxor r2, r1, r1; vpor r4, r2, r2; \
+ vpxor r3, r4, r4; vpand r1, r3, r3; \
+ vpxor r0, r3, r3; vpxor r1, r4, r4; \
+ vpxor r2, r4, r4; vpxor r0, r2, r2; \
+ vpand r3, r0, r0; vpxor RNOT, r2, r2; \
+ vpxor r4, r0, r0; vpor r3, r4, r4; \
+ vpxor r4, r2, r2;
+
+#define SBOX5_INVERSE(r0, r1, r2, r3, r4) \
+ vpxor RNOT, r1, r1; vmovdqa r3, r4; \
+ vpxor r1, r2, r2; vpor r0, r3, r3; \
+ vpxor r2, r3, r3; vpor r1, r2, r2; \
+ vpand r0, r2, r2; vpxor r3, r4, r4; \
+ vpxor r4, r2, r2; vpor r0, r4, r4; \
+ vpxor r1, r4, r4; vpand r2, r1, r1; \
+ vpxor r3, r1, r1; vpxor r2, r4, r4; \
+ vpand r4, r3, r3; vpxor r1, r4, r4; \
+ vpxor r4, r3, r3; vpxor RNOT, r4, r4; \
+ vpxor r0, r3, r3;
+
+#define SBOX6(r0, r1, r2, r3, r4) \
+ vpxor RNOT, r2, r2; vmovdqa r3, r4; \
+ vpand r0, r3, r3; vpxor r4, r0, r0; \
+ vpxor r2, r3, r3; vpor r4, r2, r2; \
+ vpxor r3, r1, r1; vpxor r0, r2, r2; \
+ vpor r1, r0, r0; vpxor r1, r2, r2; \
+ vpxor r0, r4, r4; vpor r3, r0, r0; \
+ vpxor r2, r0, r0; vpxor r3, r4, r4; \
+ vpxor r0, r4, r4; vpxor RNOT, r3, r3; \
+ vpand r4, r2, r2; \
+ vpxor r3, r2, r2;
+
+#define SBOX6_INVERSE(r0, r1, r2, r3, r4) \
+ vpxor r2, r0, r0; vmovdqa r2, r4; \
+ vpand r0, r2, r2; vpxor r3, r4, r4; \
+ vpxor RNOT, r2, r2; vpxor r1, r3, r3; \
+ vpxor r3, r2, r2; vpor r0, r4, r4; \
+ vpxor r2, r0, r0; vpxor r4, r3, r3; \
+ vpxor r1, r4, r4; vpand r3, r1, r1; \
+ vpxor r0, r1, r1; vpxor r3, r0, r0; \
+ vpor r2, r0, r0; vpxor r1, r3, r3; \
+ vpxor r0, r4, r4;
+
+#define SBOX7(r0, r1, r2, r3, r4) \
+ vmovdqa r1, r4; vpor r2, r1, r1; \
+ vpxor r3, r1, r1; vpxor r2, r4, r4; \
+ vpxor r1, r2, r2; vpor r4, r3, r3; \
+ vpand r0, r3, r3; vpxor r2, r4, r4; \
+ vpxor r1, r3, r3; vpor r4, r1, r1; \
+ vpxor r0, r1, r1; vpor r4, r0, r0; \
+ vpxor r2, r0, r0; vpxor r4, r1, r1; \
+ vpxor r1, r2, r2; vpand r0, r1, r1; \
+ vpxor r4, r1, r1; vpxor RNOT, r2, r2; \
+ vpor r0, r2, r2; \
+ vpxor r2, r4, r4;
+
+#define SBOX7_INVERSE(r0, r1, r2, r3, r4) \
+ vmovdqa r2, r4; vpxor r0, r2, r2; \
+ vpand r3, r0, r0; vpor r3, r4, r4; \
+ vpxor RNOT, r2, r2; vpxor r1, r3, r3; \
+ vpor r0, r1, r1; vpxor r2, r0, r0; \
+ vpand r4, r2, r2; vpand r4, r3, r3; \
+ vpxor r2, r1, r1; vpxor r0, r2, r2; \
+ vpor r2, r0, r0; vpxor r1, r4, r4; \
+ vpxor r3, r0, r0; vpxor r4, r3, r3; \
+ vpor r0, r4, r4; vpxor r2, r3, r3; \
+ vpxor r2, r4, r4;
+
+/* Apply SBOX number WHICH to to the block. */
+#define SBOX(which, r0, r1, r2, r3, r4) \
+ SBOX##which (r0, r1, r2, r3, r4)
+
+/* Apply inverse SBOX number WHICH to to the block. */
+#define SBOX_INVERSE(which, r0, r1, r2, r3, r4) \
+ SBOX##which##_INVERSE (r0, r1, r2, r3, r4)
+
+/* XOR round key into block state in r0,r1,r2,r3. r4 used as temporary. */
+#define BLOCK_XOR_KEY(r0, r1, r2, r3, r4, round) \
+ vpbroadcastd (ctx_keys + (round) * 16 + 0 * 4)(CTX), r4; \
+ vpxor r4, r0, r0; \
+ vpbroadcastd (ctx_keys + (round) * 16 + 1 * 4)(CTX), r4; \
+ vpxor r4, r1, r1; \
+ vpbroadcastd (ctx_keys + (round) * 16 + 2 * 4)(CTX), r4; \
+ vpxor r4, r2, r2; \
+ vpbroadcastd (ctx_keys + (round) * 16 + 3 * 4)(CTX), r4; \
+ vpxor r4, r3, r3;
+
+/* Apply the linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION(r0, r1, r2, r3, r4) \
+ vec_rol(r0, 13, r4); \
+ vec_rol(r2, 3, r4); \
+ vpxor r0, r1, r1; \
+ vpxor r2, r1, r1; \
+ vpslld $3, r0, r4; \
+ vpxor r2, r3, r3; \
+ vpxor r4, r3, r3; \
+ vec_rol(r1, 1, r4); \
+ vec_rol(r3, 7, r4); \
+ vpxor r1, r0, r0; \
+ vpxor r3, r0, r0; \
+ vpslld $7, r1, r4; \
+ vpxor r3, r2, r2; \
+ vpxor r4, r2, r2; \
+ vec_rol(r0, 5, r4); \
+ vec_rol(r2, 22, r4);
+
+/* Apply the inverse linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION_INVERSE(r0, r1, r2, r3, r4) \
+ vec_ror(r2, 22, r4); \
+ vec_ror(r0, 5, r4); \
+ vpslld $7, r1, r4; \
+ vpxor r3, r2, r2; \
+ vpxor r4, r2, r2; \
+ vpxor r1, r0, r0; \
+ vpxor r3, r0, r0; \
+ vec_ror(r3, 7, r4); \
+ vec_ror(r1, 1, r4); \
+ vpslld $3, r0, r4; \
+ vpxor r2, r3, r3; \
+ vpxor r4, r3, r3; \
+ vpxor r0, r1, r1; \
+ vpxor r2, r1, r1; \
+ vec_ror(r2, 3, r4); \
+ vec_ror(r0, 13, r4);
+
+/* Apply a Serpent round to sixteen parallel blocks. This macro increments
+ `round'. */
+#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \
+ SBOX (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \
+ SBOX (which, b0, b1, b2, b3, b4); \
+ LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4); \
+ LINEAR_TRANSFORMATION (nb0, nb1, nb2, nb3, nb4);
+
+/* Apply the last Serpent round to sixteen parallel blocks. This macro
+ increments `round'. */
+#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \
+ SBOX (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \
+ SBOX (which, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, ((round) + 1)); \
+ BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, ((round) + 1));
+
+/* Apply an inverse Serpent round to sixteen parallel blocks. This macro
+ increments `round'. */
+#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \
+ na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, \
+ nb0, nb1, nb2, nb3, nb4) \
+ LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4); \
+ LINEAR_TRANSFORMATION_INVERSE (b0, b1, b2, b3, b4); \
+ SBOX_INVERSE (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \
+ SBOX_INVERSE (which, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+/* Apply the first inverse Serpent round to sixteen parallel blocks. This macro
+ increments `round'. */
+#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \
+ na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, \
+ nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, ((round) + 1)); \
+ BLOCK_XOR_KEY (b0, b1, b2, b3, b4, ((round) + 1)); \
+ SBOX_INVERSE (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \
+ SBOX_INVERSE (which, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+.text
+
+.align 8
+ELF(.type __serpent_enc_blk16,@function;)
+__serpent_enc_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * plaintext blocks
+ * output:
+ * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: sixteen parallel
+ * ciphertext blocks
+ */
+ CFI_STARTPROC();
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+ ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+ ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+ ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+ ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+ ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+ ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+ ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+ ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+ ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0,
+ RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0);
+ ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0,
+ RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0);
+ ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2,
+ RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2);
+ ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4,
+ RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4);
+ ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0,
+ RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0);
+ ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0,
+ RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0);
+ ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3,
+ RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3);
+ ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0,
+ RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0);
+ ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4,
+ RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4);
+ ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4,
+ RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4);
+ ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2,
+ RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2);
+ ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3,
+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3);
+ ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4,
+ RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4);
+ ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4,
+ RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4);
+ ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0,
+ RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0);
+ ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4,
+ RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4);
+ ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+ ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+ ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+ ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+ ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+ ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+ ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+ ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+
+ transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
+
+.align 8
+ELF(.type __serpent_dec_blk16,@function;)
+__serpent_dec_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * ciphertext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * plaintext blocks
+ */
+ CFI_STARTPROC();
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+ ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4,
+ RA3, RA0, RA1, RA4, RA2,
+ RB0, RB1, RB2, RB3, RB4,
+ RB3, RB0, RB1, RB4, RB2);
+ ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3,
+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3);
+ ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0,
+ RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0);
+ ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3,
+ RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3);
+ ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3,
+ RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3);
+ ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4,
+ RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4);
+ ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3,
+ RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3);
+ ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1,
+ RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1);
+ ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2,
+ RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2);
+ ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0,
+ RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0);
+ ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4,
+ RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4);
+ ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0,
+ RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0);
+ ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0,
+ RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0);
+ ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1,
+ RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1);
+ ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0,
+ RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0);
+ ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3,
+ RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3);
+ ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2,
+ RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2);
+ ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4,
+ RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4);
+ ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1,
+ RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1);
+ ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4);
+ ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4,
+ RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4);
+ ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3,
+ RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3);
+ ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4,
+ RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4);
+ ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0,
+ RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0);
+ ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2,
+ RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2);
+ ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1,
+ RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1);
+ ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3,
+ RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3);
+ ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1,
+ RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1);
+ ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1,
+ RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1);
+ ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0,
+ RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0);
+ ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1,
+ RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1);
+ ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4,
+ RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4);
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_serpent_avx2_ctr_enc
+ELF(.type _gcry_serpent_avx2_ctr_enc,@function;)
+_gcry_serpent_avx2_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ movq 8(%rcx), %rax;
+ bswapq %rax;
+
+ vzeroupper;
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
+ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RTMP4x;
+ vpshufb RTMP3x, RTMP4x, RTMP4x;
+ vmovdqa RTMP4x, RTMP0x;
+ inc_le128(RTMP4x, RNOTx, RTMP1x);
+ vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+ vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 16), %rax;
+ ja .Lhandle_ctr_carry;
+
+ /* construct IVs */
+ vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+ vpshufb RTMP3, RTMP0, RA1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+ vpshufb RTMP3, RTMP0, RA2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+ vpshufb RTMP3, RTMP0, RA3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+ vpshufb RTMP3, RTMP0, RB0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+ vpshufb RTMP3, RTMP0, RB1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+ vpshufb RTMP3, RTMP0, RB2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+ vpshufb RTMP3, RTMP0, RB3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+ vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+ jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vextracti128 $1, RTMP0, RTMP0x;
+ vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+ /* store new IV */
+ vmovdqu RTMP0x, (%rcx);
+
+ call __serpent_enc_blk16;
+
+ vpxor (0 * 32)(%rdx), RA4, RA4;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA0, RA0;
+ vpxor (4 * 32)(%rdx), RB4, RB4;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB0, RB0;
+
+ vmovdqu RA4, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA0, (3 * 32)(%rsi);
+ vmovdqu RB4, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB0, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
+
+.align 8
+.globl _gcry_serpent_avx2_cbc_dec
+ELF(.type _gcry_serpent_avx2_cbc_dec,@function;)
+_gcry_serpent_avx2_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ vmovdqu (0 * 32)(%rdx), RA0;
+ vmovdqu (1 * 32)(%rdx), RA1;
+ vmovdqu (2 * 32)(%rdx), RA2;
+ vmovdqu (3 * 32)(%rdx), RA3;
+ vmovdqu (4 * 32)(%rdx), RB0;
+ vmovdqu (5 * 32)(%rdx), RB1;
+ vmovdqu (6 * 32)(%rdx), RB2;
+ vmovdqu (7 * 32)(%rdx), RB3;
+
+ call __serpent_dec_blk16;
+
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RNOT;
+ vpxor RNOT, RA0, RA0;
+ vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
+ vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
+ vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
+ vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
+ vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+ vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
+ vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
+
+.align 8
+.globl _gcry_serpent_avx2_cfb_dec
+ELF(.type _gcry_serpent_avx2_cfb_dec,@function;)
+_gcry_serpent_avx2_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ /* Load input */
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RA0;
+ vmovdqu (0 * 32 + 16)(%rdx), RA1;
+ vmovdqu (1 * 32 + 16)(%rdx), RA2;
+ vmovdqu (2 * 32 + 16)(%rdx), RA3;
+ vmovdqu (3 * 32 + 16)(%rdx), RB0;
+ vmovdqu (4 * 32 + 16)(%rdx), RB1;
+ vmovdqu (5 * 32 + 16)(%rdx), RB2;
+ vmovdqu (6 * 32 + 16)(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx);
+
+ call __serpent_enc_blk16;
+
+ vpxor (0 * 32)(%rdx), RA4, RA4;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA0, RA0;
+ vpxor (4 * 32)(%rdx), RB4, RB4;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB0, RB0;
+
+ vmovdqu RA4, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA0, (3 * 32)(%rsi);
+ vmovdqu RB4, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB0, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
+
+.align 8
+.globl _gcry_serpent_avx2_ocb_enc
+ELF(.type _gcry_serpent_avx2_ocb_enc,@function;)
+
+_gcry_serpent_avx2_ocb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rcx), RTMP0x;
+ vmovdqu (%r8), RTMP1x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RTMP1, RTMP1; \
+ vpxor yreg, RNOT, yreg; \
+ vmovdqu RNOT, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RA1);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, RA2);
+ OCB_INPUT(3, %r12, %r13, RA3);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, RB0);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, RB2);
+ OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vmovdqu RTMP0x, (%rcx);
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%r8);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __serpent_enc_blk16;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vpxor (0 * 32)(%rsi), RA4, RA4;
+ vpxor (1 * 32)(%rsi), RA1, RA1;
+ vpxor (2 * 32)(%rsi), RA2, RA2;
+ vpxor (3 * 32)(%rsi), RA0, RA0;
+ vpxor (4 * 32)(%rsi), RB4, RB4;
+ vpxor (5 * 32)(%rsi), RB1, RB1;
+ vpxor (6 * 32)(%rsi), RB2, RB2;
+ vpxor (7 * 32)(%rsi), RB0, RB0;
+
+ vmovdqu RA4, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA0, (3 * 32)(%rsi);
+ vmovdqu RB4, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB0, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;)
+
+.align 8
+.globl _gcry_serpent_avx2_ocb_dec
+ELF(.type _gcry_serpent_avx2_ocb_dec,@function;)
+
+_gcry_serpent_avx2_ocb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rcx), RTMP0x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RNOT, yreg; \
+ vmovdqu RNOT, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RA1);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, RA2);
+ OCB_INPUT(3, %r12, %r13, RA3);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, RB0);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, RB2);
+ OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0x, (%rcx);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __serpent_dec_blk16;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vmovdqu (%r8), RTMP1x;
+
+ vpxor (0 * 32)(%rsi), RA0, RA0;
+ vpxor (1 * 32)(%rsi), RA1, RA1;
+ vpxor (2 * 32)(%rsi), RA2, RA2;
+ vpxor (3 * 32)(%rsi), RA3, RA3;
+ vpxor (4 * 32)(%rsi), RB0, RB0;
+ vpxor (5 * 32)(%rsi), RB1, RB1;
+ vpxor (6 * 32)(%rsi), RB2, RB2;
+ vpxor (7 * 32)(%rsi), RB3, RB3;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vpxor RA0, RTMP1, RTMP1;
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vpxor RA1, RTMP1, RTMP1;
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vpxor RA2, RTMP1, RTMP1;
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vpxor RA3, RTMP1, RTMP1;
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vpxor RB0, RTMP1, RTMP1;
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vpxor RB1, RTMP1, RTMP1;
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vpxor RB2, RTMP1, RTMP1;
+ vmovdqu RB3, (7 * 32)(%rsi);
+ vpxor RB3, RTMP1, RTMP1;
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%r8);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;)
+
+.align 8
+.globl _gcry_serpent_avx2_ocb_auth
+ELF(.type _gcry_serpent_avx2_ocb_auth,@function;)
+
+_gcry_serpent_avx2_ocb_auth:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: abuf (16 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rdx), RTMP0x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rsi), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RNOT, yreg;
+
+ movq (0 * 8)(%r8), %r10;
+ movq (1 * 8)(%r8), %r11;
+ movq (2 * 8)(%r8), %r12;
+ movq (3 * 8)(%r8), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RA1);
+ movq (4 * 8)(%r8), %r10;
+ movq (5 * 8)(%r8), %r11;
+ movq (6 * 8)(%r8), %r12;
+ movq (7 * 8)(%r8), %r13;
+ OCB_INPUT(2, %r10, %r11, RA2);
+ OCB_INPUT(3, %r12, %r13, RA3);
+ movq (8 * 8)(%r8), %r10;
+ movq (9 * 8)(%r8), %r11;
+ movq (10 * 8)(%r8), %r12;
+ movq (11 * 8)(%r8), %r13;
+ OCB_INPUT(4, %r10, %r11, RB0);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r8), %r10;
+ movq (13 * 8)(%r8), %r11;
+ movq (14 * 8)(%r8), %r12;
+ movq (15 * 8)(%r8), %r13;
+ OCB_INPUT(6, %r10, %r11, RB2);
+ OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0x, (%rdx);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __serpent_enc_blk16;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vpxor RA4, RB4, RA4;
+ vpxor RA1, RB1, RA1;
+ vpxor RA2, RB2, RA2;
+ vpxor RA0, RB0, RA0;
+
+ vpxor RA4, RA1, RA1;
+ vpxor RA2, RA0, RA0;
+
+ vpxor RA1, RA0, RTMP1;
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vpxor (%rcx), RTMP1x, RTMP1x;
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%rcx);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;)
+
+.align 16
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+#endif /*defined(USE_SERPENT) && defined(ENABLE_AVX2_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/serpent-sse2-amd64.S b/comm/third_party/libgcrypt/cipher/serpent-sse2-amd64.S
new file mode 100644
index 0000000000..39cba00297
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/serpent-sse2-amd64.S
@@ -0,0 +1,1211 @@
+/* serpent-sse2-amd64.S - SSE2 implementation of Serpent cipher
+ *
+ * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT)
+
+#include "asm-common-amd64.h"
+
+/* struct serpent_context: */
+#define ctx_keys 0
+
+/* register macros */
+#define CTX %rdi
+
+/* vector registers */
+#define RA0 %xmm0
+#define RA1 %xmm1
+#define RA2 %xmm2
+#define RA3 %xmm3
+#define RA4 %xmm4
+
+#define RB0 %xmm5
+#define RB1 %xmm6
+#define RB2 %xmm7
+#define RB3 %xmm8
+#define RB4 %xmm9
+
+#define RNOT %xmm10
+#define RTMP0 %xmm11
+#define RTMP1 %xmm12
+#define RTMP2 %xmm13
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+/* vector 32-bit rotation to left */
+#define vec_rol(reg, nleft, tmp) \
+ movdqa reg, tmp; \
+ pslld $(nleft), tmp; \
+ psrld $(32 - (nleft)), reg; \
+ por tmp, reg;
+
+/* vector 32-bit rotation to right */
+#define vec_ror(reg, nright, tmp) \
+ vec_rol(reg, 32 - nright, tmp)
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+ movdqa x0, t2; \
+ punpckhdq x1, t2; \
+ punpckldq x1, x0; \
+ \
+ movdqa x2, t1; \
+ punpckldq x3, t1; \
+ punpckhdq x3, x2; \
+ \
+ movdqa x0, x1; \
+ punpckhqdq t1, x1; \
+ punpcklqdq t1, x0; \
+ \
+ movdqa t2, x3; \
+ punpckhqdq x2, x3; \
+ punpcklqdq x2, t2; \
+ movdqa t2, x2;
+
+/* fill xmm register with 32-bit value from memory */
+#define pbroadcastd(mem32, xreg) \
+ movd mem32, xreg; \
+ pshufd $0, xreg, xreg;
+
+/* xor with unaligned memory operand */
+#define pxor_u(umem128, xreg, t) \
+ movdqu umem128, t; \
+ pxor t, xreg;
+
+/* 128-bit wide byte swap */
+#define pbswap(xreg, t0) \
+ /* reorder 32-bit words, [a,b,c,d] => [d,c,b,a] */ \
+ pshufd $0x1b, xreg, xreg; \
+ /* reorder high&low 16-bit words, [d0,d1,c0,c1] => [d1,d0,c1,c0] */ \
+ pshuflw $0xb1, xreg, xreg; \
+ pshufhw $0xb1, xreg, xreg; \
+ /* reorder bytes in 16-bit words */ \
+ movdqa xreg, t0; \
+ psrlw $8, t0; \
+ psllw $8, xreg; \
+ por t0, xreg;
+
+/**********************************************************************
+ 8-way serpent
+ **********************************************************************/
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ * (New York, New York, USA), p. 317–329, National Institute of Standards and
+ * Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+#define SBOX0(r0, r1, r2, r3, r4) \
+ pxor r0, r3; movdqa r1, r4; \
+ pand r3, r1; pxor r2, r4; \
+ pxor r0, r1; por r3, r0; \
+ pxor r4, r0; pxor r3, r4; \
+ pxor r2, r3; por r1, r2; \
+ pxor r4, r2; pxor RNOT, r4; \
+ por r1, r4; pxor r3, r1; \
+ pxor r4, r1; por r0, r3; \
+ pxor r3, r1; pxor r3, r4;
+
+#define SBOX0_INVERSE(r0, r1, r2, r3, r4) \
+ pxor RNOT, r2; movdqa r1, r4; \
+ por r0, r1; pxor RNOT, r4; \
+ pxor r2, r1; por r4, r2; \
+ pxor r3, r1; pxor r4, r0; \
+ pxor r0, r2; pand r3, r0; \
+ pxor r0, r4; por r1, r0; \
+ pxor r2, r0; pxor r4, r3; \
+ pxor r1, r2; pxor r0, r3; \
+ pxor r1, r3; \
+ pand r3, r2; \
+ pxor r2, r4;
+
+#define SBOX1(r0, r1, r2, r3, r4) \
+ pxor RNOT, r0; pxor RNOT, r2; \
+ movdqa r0, r4; pand r1, r0; \
+ pxor r0, r2; por r3, r0; \
+ pxor r2, r3; pxor r0, r1; \
+ pxor r4, r0; por r1, r4; \
+ pxor r3, r1; por r0, r2; \
+ pand r4, r2; pxor r1, r0; \
+ pand r2, r1; \
+ pxor r0, r1; pand r2, r0; \
+ pxor r4, r0;
+
+#define SBOX1_INVERSE(r0, r1, r2, r3, r4) \
+ movdqa r1, r4; pxor r3, r1; \
+ pand r1, r3; pxor r2, r4; \
+ pxor r0, r3; por r1, r0; \
+ pxor r3, r2; pxor r4, r0; \
+ por r2, r0; pxor r3, r1; \
+ pxor r1, r0; por r3, r1; \
+ pxor r0, r1; pxor RNOT, r4; \
+ pxor r1, r4; por r0, r1; \
+ pxor r0, r1; \
+ por r4, r1; \
+ pxor r1, r3;
+
+#define SBOX2(r0, r1, r2, r3, r4) \
+ movdqa r0, r4; pand r2, r0; \
+ pxor r3, r0; pxor r1, r2; \
+ pxor r0, r2; por r4, r3; \
+ pxor r1, r3; pxor r2, r4; \
+ movdqa r3, r1; por r4, r3; \
+ pxor r0, r3; pand r1, r0; \
+ pxor r0, r4; pxor r3, r1; \
+ pxor r4, r1; pxor RNOT, r4;
+
+#define SBOX2_INVERSE(r0, r1, r2, r3, r4) \
+ pxor r3, r2; pxor r0, r3; \
+ movdqa r3, r4; pand r2, r3; \
+ pxor r1, r3; por r2, r1; \
+ pxor r4, r1; pand r3, r4; \
+ pxor r3, r2; pand r0, r4; \
+ pxor r2, r4; pand r1, r2; \
+ por r0, r2; pxor RNOT, r3; \
+ pxor r3, r2; pxor r3, r0; \
+ pand r1, r0; pxor r4, r3; \
+ pxor r0, r3;
+
+#define SBOX3(r0, r1, r2, r3, r4) \
+ movdqa r0, r4; por r3, r0; \
+ pxor r1, r3; pand r4, r1; \
+ pxor r2, r4; pxor r3, r2; \
+ pand r0, r3; por r1, r4; \
+ pxor r4, r3; pxor r1, r0; \
+ pand r0, r4; pxor r3, r1; \
+ pxor r2, r4; por r0, r1; \
+ pxor r2, r1; pxor r3, r0; \
+ movdqa r1, r2; por r3, r1; \
+ pxor r0, r1;
+
+#define SBOX3_INVERSE(r0, r1, r2, r3, r4) \
+ movdqa r2, r4; pxor r1, r2; \
+ pxor r2, r0; pand r2, r4; \
+ pxor r0, r4; pand r1, r0; \
+ pxor r3, r1; por r4, r3; \
+ pxor r3, r2; pxor r3, r0; \
+ pxor r4, r1; pand r2, r3; \
+ pxor r1, r3; pxor r0, r1; \
+ por r2, r1; pxor r3, r0; \
+ pxor r4, r1; \
+ pxor r1, r0;
+
+#define SBOX4(r0, r1, r2, r3, r4) \
+ pxor r3, r1; pxor RNOT, r3; \
+ pxor r3, r2; pxor r0, r3; \
+ movdqa r1, r4; pand r3, r1; \
+ pxor r2, r1; pxor r3, r4; \
+ pxor r4, r0; pand r4, r2; \
+ pxor r0, r2; pand r1, r0; \
+ pxor r0, r3; por r1, r4; \
+ pxor r0, r4; por r3, r0; \
+ pxor r2, r0; pand r3, r2; \
+ pxor RNOT, r0; pxor r2, r4;
+
+#define SBOX4_INVERSE(r0, r1, r2, r3, r4) \
+ movdqa r2, r4; pand r3, r2; \
+ pxor r1, r2; por r3, r1; \
+ pand r0, r1; pxor r2, r4; \
+ pxor r1, r4; pand r2, r1; \
+ pxor RNOT, r0; pxor r4, r3; \
+ pxor r3, r1; pand r0, r3; \
+ pxor r2, r3; pxor r1, r0; \
+ pand r0, r2; pxor r0, r3; \
+ pxor r4, r2; \
+ por r3, r2; pxor r0, r3; \
+ pxor r1, r2;
+
+#define SBOX5(r0, r1, r2, r3, r4) \
+ pxor r1, r0; pxor r3, r1; \
+ pxor RNOT, r3; movdqa r1, r4; \
+ pand r0, r1; pxor r3, r2; \
+ pxor r2, r1; por r4, r2; \
+ pxor r3, r4; pand r1, r3; \
+ pxor r0, r3; pxor r1, r4; \
+ pxor r2, r4; pxor r0, r2; \
+ pand r3, r0; pxor RNOT, r2; \
+ pxor r4, r0; por r3, r4; \
+ pxor r4, r2;
+
+#define SBOX5_INVERSE(r0, r1, r2, r3, r4) \
+ pxor RNOT, r1; movdqa r3, r4; \
+ pxor r1, r2; por r0, r3; \
+ pxor r2, r3; por r1, r2; \
+ pand r0, r2; pxor r3, r4; \
+ pxor r4, r2; por r0, r4; \
+ pxor r1, r4; pand r2, r1; \
+ pxor r3, r1; pxor r2, r4; \
+ pand r4, r3; pxor r1, r4; \
+ pxor r4, r3; pxor RNOT, r4; \
+ pxor r0, r3;
+
+#define SBOX6(r0, r1, r2, r3, r4) \
+ pxor RNOT, r2; movdqa r3, r4; \
+ pand r0, r3; pxor r4, r0; \
+ pxor r2, r3; por r4, r2; \
+ pxor r3, r1; pxor r0, r2; \
+ por r1, r0; pxor r1, r2; \
+ pxor r0, r4; por r3, r0; \
+ pxor r2, r0; pxor r3, r4; \
+ pxor r0, r4; pxor RNOT, r3; \
+ pand r4, r2; \
+ pxor r3, r2;
+
+#define SBOX6_INVERSE(r0, r1, r2, r3, r4) \
+ pxor r2, r0; movdqa r2, r4; \
+ pand r0, r2; pxor r3, r4; \
+ pxor RNOT, r2; pxor r1, r3; \
+ pxor r3, r2; por r0, r4; \
+ pxor r2, r0; pxor r4, r3; \
+ pxor r1, r4; pand r3, r1; \
+ pxor r0, r1; pxor r3, r0; \
+ por r2, r0; pxor r1, r3; \
+ pxor r0, r4;
+
+#define SBOX7(r0, r1, r2, r3, r4) \
+ movdqa r1, r4; por r2, r1; \
+ pxor r3, r1; pxor r2, r4; \
+ pxor r1, r2; por r4, r3; \
+ pand r0, r3; pxor r2, r4; \
+ pxor r1, r3; por r4, r1; \
+ pxor r0, r1; por r4, r0; \
+ pxor r2, r0; pxor r4, r1; \
+ pxor r1, r2; pand r0, r1; \
+ pxor r4, r1; pxor RNOT, r2; \
+ por r0, r2; \
+ pxor r2, r4;
+
+#define SBOX7_INVERSE(r0, r1, r2, r3, r4) \
+ movdqa r2, r4; pxor r0, r2; \
+ pand r3, r0; por r3, r4; \
+ pxor RNOT, r2; pxor r1, r3; \
+ por r0, r1; pxor r2, r0; \
+ pand r4, r2; pand r4, r3; \
+ pxor r2, r1; pxor r0, r2; \
+ por r2, r0; pxor r1, r4; \
+ pxor r3, r0; pxor r4, r3; \
+ por r0, r4; pxor r2, r3; \
+ pxor r2, r4;
+
+/* Apply SBOX number WHICH to to the block. */
+#define SBOX(which, r0, r1, r2, r3, r4) \
+ SBOX##which (r0, r1, r2, r3, r4)
+
+/* Apply inverse SBOX number WHICH to to the block. */
+#define SBOX_INVERSE(which, r0, r1, r2, r3, r4) \
+ SBOX##which##_INVERSE (r0, r1, r2, r3, r4)
+
+/* XOR round key into block state in r0,r1,r2,r3. r4 used as temporary. */
+#define BLOCK_XOR_KEY(r0, r1, r2, r3, r4, round) \
+ pbroadcastd ((ctx_keys + (round) * 16 + 0 * 4)(CTX), r4); \
+ pxor r4, r0; \
+ pbroadcastd ((ctx_keys + (round) * 16 + 1 * 4)(CTX), r4); \
+ pxor r4, r1; \
+ pbroadcastd ((ctx_keys + (round) * 16 + 2 * 4)(CTX), r4); \
+ pxor r4, r2; \
+ pbroadcastd ((ctx_keys + (round) * 16 + 3 * 4)(CTX), r4); \
+ pxor r4, r3;
+
+/* Apply the linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION(r0, r1, r2, r3, r4) \
+ vec_rol(r0, 13, r4); \
+ vec_rol(r2, 3, r4); \
+ pxor r0, r1; \
+ pxor r2, r1; \
+ movdqa r0, r4; \
+ pslld $3, r4; \
+ pxor r2, r3; \
+ pxor r4, r3; \
+ vec_rol(r1, 1, r4); \
+ vec_rol(r3, 7, r4); \
+ pxor r1, r0; \
+ pxor r3, r0; \
+ movdqa r1, r4; \
+ pslld $7, r4; \
+ pxor r3, r2; \
+ pxor r4, r2; \
+ vec_rol(r0, 5, r4); \
+ vec_rol(r2, 22, r4);
+
+/* Apply the inverse linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION_INVERSE(r0, r1, r2, r3, r4) \
+ vec_ror(r2, 22, r4); \
+ vec_ror(r0, 5, r4); \
+ movdqa r1, r4; \
+ pslld $7, r4; \
+ pxor r3, r2; \
+ pxor r4, r2; \
+ pxor r1, r0; \
+ pxor r3, r0; \
+ vec_ror(r3, 7, r4); \
+ vec_ror(r1, 1, r4); \
+ movdqa r0, r4; \
+ pslld $3, r4; \
+ pxor r2, r3; \
+ pxor r4, r3; \
+ pxor r0, r1; \
+ pxor r2, r1; \
+ vec_ror(r2, 3, r4); \
+ vec_ror(r0, 13, r4);
+
+/* Apply a Serpent round to eight parallel blocks. This macro increments
+ `round'. */
+#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \
+ SBOX (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \
+ SBOX (which, b0, b1, b2, b3, b4); \
+ LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4); \
+ LINEAR_TRANSFORMATION (nb0, nb1, nb2, nb3, nb4);
+
+/* Apply the last Serpent round to eight parallel blocks. This macro increments
+ `round'. */
+#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round); \
+ SBOX (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round); \
+ SBOX (which, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, ((round) + 1)); \
+ BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, ((round) + 1));
+
+/* Apply an inverse Serpent round to eight parallel blocks. This macro
+ increments `round'. */
+#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \
+ na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, \
+ nb0, nb1, nb2, nb3, nb4) \
+ LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4); \
+ LINEAR_TRANSFORMATION_INVERSE (b0, b1, b2, b3, b4); \
+ SBOX_INVERSE (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \
+ SBOX_INVERSE (which, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+/* Apply the first inverse Serpent round to eight parallel blocks. This macro
+ increments `round'. */
+#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \
+ na0, na1, na2, na3, na4, \
+ b0, b1, b2, b3, b4, \
+ nb0, nb1, nb2, nb3, nb4) \
+ BLOCK_XOR_KEY (a0, a1, a2, a3, a4, ((round) + 1)); \
+ BLOCK_XOR_KEY (b0, b1, b2, b3, b4, ((round) + 1)); \
+ SBOX_INVERSE (which, a0, a1, a2, a3, a4); \
+ BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \
+ SBOX_INVERSE (which, b0, b1, b2, b3, b4); \
+ BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+.text
+
+.align 8
+ELF(.type __serpent_enc_blk8,@function;)
+__serpent_enc_blk8:
+ /* input:
+ * %rdi: ctx, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+ * blocks
+ * output:
+ * RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel
+ * ciphertext blocks
+ */
+ CFI_STARTPROC();
+
+ pcmpeqd RNOT, RNOT;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+ ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+ ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+ ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+ ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+ ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+ ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+ ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+ ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+ ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0,
+ RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0);
+ ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0,
+ RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0);
+ ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2,
+ RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2);
+ ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4,
+ RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4);
+ ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0,
+ RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0);
+ ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0,
+ RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0);
+ ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3,
+ RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3);
+ ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0,
+ RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0);
+ ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4,
+ RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4);
+ ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4,
+ RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4);
+ ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2,
+ RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2);
+ ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3,
+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3);
+ ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4,
+ RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4);
+ ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4,
+ RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4);
+ ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0,
+ RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0);
+ ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4,
+ RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4);
+ ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+ RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+ ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+ RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+ ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+ RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+ ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+ RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+ ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+ RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+ ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+ RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+ ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+ ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+ RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+
+ transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
+
+.align 8
+ELF(.type __serpent_dec_blk8,@function;)
+__serpent_dec_blk8:
+ /* input:
+ * %rdi: ctx, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * ciphertext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+ * blocks
+ */
+ CFI_STARTPROC();
+
+ pcmpeqd RNOT, RNOT;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+ ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4,
+ RA3, RA0, RA1, RA4, RA2,
+ RB0, RB1, RB2, RB3, RB4,
+ RB3, RB0, RB1, RB4, RB2);
+ ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3,
+ RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3);
+ ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0,
+ RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0);
+ ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3,
+ RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3);
+ ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3,
+ RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3);
+ ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4,
+ RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4);
+ ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3,
+ RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3);
+ ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1,
+ RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1);
+ ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2,
+ RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2);
+ ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0,
+ RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0);
+ ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4,
+ RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4);
+ ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0,
+ RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0);
+ ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0,
+ RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0);
+ ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1,
+ RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1);
+ ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0,
+ RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0);
+ ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3,
+ RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3);
+ ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2,
+ RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2);
+ ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4,
+ RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4);
+ ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1,
+ RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1);
+ ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4,
+ RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4);
+ ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4,
+ RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4);
+ ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3,
+ RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3);
+ ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4,
+ RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4);
+ ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0,
+ RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0);
+ ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2,
+ RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2);
+ ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1,
+ RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1);
+ ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3,
+ RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3);
+ ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1,
+ RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1);
+ ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1,
+ RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1);
+ ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0,
+ RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0);
+ ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1,
+ RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1);
+ ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4,
+ RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4);
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
+
+.align 8
+.globl _gcry_serpent_sse2_ctr_enc
+ELF(.type _gcry_serpent_sse2_ctr_enc,@function;)
+_gcry_serpent_sse2_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ /* load IV and byteswap */
+ movdqu (%rcx), RA0;
+ movdqa RA0, RTMP0;
+ pbswap(RTMP0, RTMP1); /* be => le */
+
+ pcmpeqd RNOT, RNOT;
+ psrldq $8, RNOT; /* low: -1, high: 0 */
+ movdqa RNOT, RTMP2;
+ paddq RTMP2, RTMP2; /* low: -2, high: 0 */
+
+ /* construct IVs */
+ movdqa RTMP0, RTMP1;
+ psubq RNOT, RTMP0; /* +1 */
+ movdqa RTMP0, RA1;
+ psubq RTMP2, RTMP1; /* +2 */
+ movdqa RTMP1, RA2;
+ psubq RTMP2, RTMP0; /* +3 */
+ movdqa RTMP0, RA3;
+ psubq RTMP2, RTMP1; /* +4 */
+ movdqa RTMP1, RB0;
+ psubq RTMP2, RTMP0; /* +5 */
+ movdqa RTMP0, RB1;
+ psubq RTMP2, RTMP1; /* +6 */
+ movdqa RTMP1, RB2;
+ psubq RTMP2, RTMP0; /* +7 */
+ movdqa RTMP0, RB3;
+ psubq RTMP2, RTMP1; /* +8 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpl $0xffffffff, 8(%rcx);
+ jne .Lno_ctr_carry;
+
+ movl 12(%rcx), %eax;
+ bswapl %eax;
+ cmpl $-8, %eax;
+ jb .Lno_ctr_carry;
+ pslldq $8, RNOT; /* low: 0, high: -1 */
+ je .Lcarry_RTMP0;
+
+ cmpl $-6, %eax;
+ jb .Lcarry_RB3;
+ je .Lcarry_RB2;
+
+ cmpl $-4, %eax;
+ jb .Lcarry_RB1;
+ je .Lcarry_RB0;
+
+ cmpl $-2, %eax;
+ jb .Lcarry_RA3;
+ je .Lcarry_RA2;
+
+ psubq RNOT, RA1;
+.Lcarry_RA2:
+ psubq RNOT, RA2;
+.Lcarry_RA3:
+ psubq RNOT, RA3;
+.Lcarry_RB0:
+ psubq RNOT, RB0;
+.Lcarry_RB1:
+ psubq RNOT, RB1;
+.Lcarry_RB2:
+ psubq RNOT, RB2;
+.Lcarry_RB3:
+ psubq RNOT, RB3;
+.Lcarry_RTMP0:
+ psubq RNOT, RTMP1;
+
+.Lno_ctr_carry:
+ /* le => be */
+ pbswap(RA1, RTMP0);
+ pbswap(RA2, RTMP0);
+ pbswap(RA3, RTMP0);
+ pbswap(RB0, RTMP0);
+ pbswap(RB1, RTMP0);
+ pbswap(RB2, RTMP0);
+ pbswap(RB3, RTMP0);
+ pbswap(RTMP1, RTMP0);
+ /* store new IV */
+ movdqu RTMP1, (%rcx);
+
+ call __serpent_enc_blk8;
+
+ pxor_u((0 * 16)(%rdx), RA4, RTMP0);
+ pxor_u((1 * 16)(%rdx), RA1, RTMP0);
+ pxor_u((2 * 16)(%rdx), RA2, RTMP0);
+ pxor_u((3 * 16)(%rdx), RA0, RTMP0);
+ pxor_u((4 * 16)(%rdx), RB4, RTMP0);
+ pxor_u((5 * 16)(%rdx), RB1, RTMP0);
+ pxor_u((6 * 16)(%rdx), RB2, RTMP0);
+ pxor_u((7 * 16)(%rdx), RB0, RTMP0);
+
+ movdqu RA4, (0 * 16)(%rsi);
+ movdqu RA1, (1 * 16)(%rsi);
+ movdqu RA2, (2 * 16)(%rsi);
+ movdqu RA0, (3 * 16)(%rsi);
+ movdqu RB4, (4 * 16)(%rsi);
+ movdqu RB1, (5 * 16)(%rsi);
+ movdqu RB2, (6 * 16)(%rsi);
+ movdqu RB0, (7 * 16)(%rsi);
+
+ /* clear the used registers */
+ pxor RA0, RA0;
+ pxor RA1, RA1;
+ pxor RA2, RA2;
+ pxor RA3, RA3;
+ pxor RA4, RA4;
+ pxor RB0, RB0;
+ pxor RB1, RB1;
+ pxor RB2, RB2;
+ pxor RB3, RB3;
+ pxor RB4, RB4;
+ pxor RTMP0, RTMP0;
+ pxor RTMP1, RTMP1;
+ pxor RTMP2, RTMP2;
+ pxor RNOT, RNOT;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
+
+.align 8
+.globl _gcry_serpent_sse2_cbc_dec
+ELF(.type _gcry_serpent_sse2_cbc_dec,@function;)
+_gcry_serpent_sse2_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ movdqu (0 * 16)(%rdx), RA0;
+ movdqu (1 * 16)(%rdx), RA1;
+ movdqu (2 * 16)(%rdx), RA2;
+ movdqu (3 * 16)(%rdx), RA3;
+ movdqu (4 * 16)(%rdx), RB0;
+ movdqu (5 * 16)(%rdx), RB1;
+ movdqu (6 * 16)(%rdx), RB2;
+ movdqu (7 * 16)(%rdx), RB3;
+
+ call __serpent_dec_blk8;
+
+ movdqu (7 * 16)(%rdx), RNOT;
+ pxor_u((%rcx), RA0, RTMP0);
+ pxor_u((0 * 16)(%rdx), RA1, RTMP0);
+ pxor_u((1 * 16)(%rdx), RA2, RTMP0);
+ pxor_u((2 * 16)(%rdx), RA3, RTMP0);
+ pxor_u((3 * 16)(%rdx), RB0, RTMP0);
+ pxor_u((4 * 16)(%rdx), RB1, RTMP0);
+ pxor_u((5 * 16)(%rdx), RB2, RTMP0);
+ pxor_u((6 * 16)(%rdx), RB3, RTMP0);
+ movdqu RNOT, (%rcx); /* store new IV */
+
+ movdqu RA0, (0 * 16)(%rsi);
+ movdqu RA1, (1 * 16)(%rsi);
+ movdqu RA2, (2 * 16)(%rsi);
+ movdqu RA3, (3 * 16)(%rsi);
+ movdqu RB0, (4 * 16)(%rsi);
+ movdqu RB1, (5 * 16)(%rsi);
+ movdqu RB2, (6 * 16)(%rsi);
+ movdqu RB3, (7 * 16)(%rsi);
+
+ /* clear the used registers */
+ pxor RA0, RA0;
+ pxor RA1, RA1;
+ pxor RA2, RA2;
+ pxor RA3, RA3;
+ pxor RA4, RA4;
+ pxor RB0, RB0;
+ pxor RB1, RB1;
+ pxor RB2, RB2;
+ pxor RB3, RB3;
+ pxor RB4, RB4;
+ pxor RTMP0, RTMP0;
+ pxor RTMP1, RTMP1;
+ pxor RTMP2, RTMP2;
+ pxor RNOT, RNOT;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
+
+.align 8
+.globl _gcry_serpent_sse2_cfb_dec
+ELF(.type _gcry_serpent_sse2_cfb_dec,@function;)
+_gcry_serpent_sse2_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ /* Load input */
+ movdqu (%rcx), RA0;
+ movdqu 0 * 16(%rdx), RA1;
+ movdqu 1 * 16(%rdx), RA2;
+ movdqu 2 * 16(%rdx), RA3;
+ movdqu 3 * 16(%rdx), RB0;
+ movdqu 4 * 16(%rdx), RB1;
+ movdqu 5 * 16(%rdx), RB2;
+ movdqu 6 * 16(%rdx), RB3;
+
+ /* Update IV */
+ movdqu 7 * 16(%rdx), RNOT;
+ movdqu RNOT, (%rcx);
+
+ call __serpent_enc_blk8;
+
+ pxor_u((0 * 16)(%rdx), RA4, RTMP0);
+ pxor_u((1 * 16)(%rdx), RA1, RTMP0);
+ pxor_u((2 * 16)(%rdx), RA2, RTMP0);
+ pxor_u((3 * 16)(%rdx), RA0, RTMP0);
+ pxor_u((4 * 16)(%rdx), RB4, RTMP0);
+ pxor_u((5 * 16)(%rdx), RB1, RTMP0);
+ pxor_u((6 * 16)(%rdx), RB2, RTMP0);
+ pxor_u((7 * 16)(%rdx), RB0, RTMP0);
+
+ movdqu RA4, (0 * 16)(%rsi);
+ movdqu RA1, (1 * 16)(%rsi);
+ movdqu RA2, (2 * 16)(%rsi);
+ movdqu RA0, (3 * 16)(%rsi);
+ movdqu RB4, (4 * 16)(%rsi);
+ movdqu RB1, (5 * 16)(%rsi);
+ movdqu RB2, (6 * 16)(%rsi);
+ movdqu RB0, (7 * 16)(%rsi);
+
+ /* clear the used registers */
+ pxor RA0, RA0;
+ pxor RA1, RA1;
+ pxor RA2, RA2;
+ pxor RA3, RA3;
+ pxor RA4, RA4;
+ pxor RB0, RB0;
+ pxor RB1, RB1;
+ pxor RB2, RB2;
+ pxor RB3, RB3;
+ pxor RB4, RB4;
+ pxor RTMP0, RTMP0;
+ pxor RTMP1, RTMP1;
+ pxor RTMP2, RTMP2;
+ pxor RNOT, RNOT;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
+
+.align 8
+.globl _gcry_serpent_sse2_ocb_enc
+ELF(.type _gcry_serpent_sse2_ocb_enc,@function;)
+
+_gcry_serpent_sse2_ocb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[8])
+ */
+ CFI_STARTPROC();
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ movdqu (%rcx), RTMP0;
+ movdqu (%r8), RTMP1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ movdqu (n * 16)(%rdx), xreg; \
+ movdqu (lreg), RNOT; \
+ pxor RNOT, RTMP0; \
+ pxor xreg, RTMP1; \
+ pxor RTMP0, xreg; \
+ movdqu RTMP0, (n * 16)(%rsi);
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, RA0);
+ OCB_INPUT(1, %r11, RA1);
+ OCB_INPUT(2, %r12, RA2);
+ OCB_INPUT(3, %r13, RA3);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, RB0);
+ OCB_INPUT(5, %r11, RB1);
+ OCB_INPUT(6, %r12, RB2);
+ OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+ movdqu RTMP0, (%rcx);
+ movdqu RTMP1, (%r8);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __serpent_enc_blk8;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ pxor_u((0 * 16)(%rsi), RA4, RTMP0);
+ pxor_u((1 * 16)(%rsi), RA1, RTMP0);
+ pxor_u((2 * 16)(%rsi), RA2, RTMP0);
+ pxor_u((3 * 16)(%rsi), RA0, RTMP0);
+ pxor_u((4 * 16)(%rsi), RB4, RTMP0);
+ pxor_u((5 * 16)(%rsi), RB1, RTMP0);
+ pxor_u((6 * 16)(%rsi), RB2, RTMP0);
+ pxor_u((7 * 16)(%rsi), RB0, RTMP0);
+
+ movdqu RA4, (0 * 16)(%rsi);
+ movdqu RA1, (1 * 16)(%rsi);
+ movdqu RA2, (2 * 16)(%rsi);
+ movdqu RA0, (3 * 16)(%rsi);
+ movdqu RB4, (4 * 16)(%rsi);
+ movdqu RB1, (5 * 16)(%rsi);
+ movdqu RB2, (6 * 16)(%rsi);
+ movdqu RB0, (7 * 16)(%rsi);
+
+ /* clear the used registers */
+ pxor RA0, RA0;
+ pxor RA1, RA1;
+ pxor RA2, RA2;
+ pxor RA3, RA3;
+ pxor RA4, RA4;
+ pxor RB0, RB0;
+ pxor RB1, RB1;
+ pxor RB2, RB2;
+ pxor RB3, RB3;
+ pxor RB4, RB4;
+ pxor RTMP0, RTMP0;
+ pxor RTMP1, RTMP1;
+ pxor RTMP2, RTMP2;
+ pxor RNOT, RNOT;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;)
+
+.align 8
+.globl _gcry_serpent_sse2_ocb_dec
+ELF(.type _gcry_serpent_sse2_ocb_dec,@function;)
+
+_gcry_serpent_sse2_ocb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[8])
+ */
+ CFI_STARTPROC();
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ movdqu (%rcx), RTMP0;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ movdqu (n * 16)(%rdx), xreg; \
+ movdqu (lreg), RNOT; \
+ pxor RNOT, RTMP0; \
+ pxor RTMP0, xreg; \
+ movdqu RTMP0, (n * 16)(%rsi);
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, RA0);
+ OCB_INPUT(1, %r11, RA1);
+ OCB_INPUT(2, %r12, RA2);
+ OCB_INPUT(3, %r13, RA3);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, RB0);
+ OCB_INPUT(5, %r11, RB1);
+ OCB_INPUT(6, %r12, RB2);
+ OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+ movdqu RTMP0, (%rcx);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __serpent_dec_blk8;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ movdqu (%r8), RTMP0;
+
+ pxor_u((0 * 16)(%rsi), RA0, RTMP1);
+ pxor_u((1 * 16)(%rsi), RA1, RTMP1);
+ pxor_u((2 * 16)(%rsi), RA2, RTMP1);
+ pxor_u((3 * 16)(%rsi), RA3, RTMP1);
+ pxor_u((4 * 16)(%rsi), RB0, RTMP1);
+ pxor_u((5 * 16)(%rsi), RB1, RTMP1);
+ pxor_u((6 * 16)(%rsi), RB2, RTMP1);
+ pxor_u((7 * 16)(%rsi), RB3, RTMP1);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+
+ movdqu RA0, (0 * 16)(%rsi);
+ pxor RA0, RTMP0;
+ movdqu RA1, (1 * 16)(%rsi);
+ pxor RA1, RTMP0;
+ movdqu RA2, (2 * 16)(%rsi);
+ pxor RA2, RTMP0;
+ movdqu RA3, (3 * 16)(%rsi);
+ pxor RA3, RTMP0;
+ movdqu RB0, (4 * 16)(%rsi);
+ pxor RB0, RTMP0;
+ movdqu RB1, (5 * 16)(%rsi);
+ pxor RB1, RTMP0;
+ movdqu RB2, (6 * 16)(%rsi);
+ pxor RB2, RTMP0;
+ movdqu RB3, (7 * 16)(%rsi);
+ pxor RB3, RTMP0;
+
+ movdqu RTMP0, (%r8);
+
+ /* clear the used registers */
+ pxor RA0, RA0;
+ pxor RA1, RA1;
+ pxor RA2, RA2;
+ pxor RA3, RA3;
+ pxor RA4, RA4;
+ pxor RB0, RB0;
+ pxor RB1, RB1;
+ pxor RB2, RB2;
+ pxor RB3, RB3;
+ pxor RB4, RB4;
+ pxor RTMP0, RTMP0;
+ pxor RTMP1, RTMP1;
+ pxor RTMP2, RTMP2;
+ pxor RNOT, RNOT;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;)
+
+.align 8
+.globl _gcry_serpent_sse2_ocb_auth
+ELF(.type _gcry_serpent_sse2_ocb_auth,@function;)
+
+_gcry_serpent_sse2_ocb_auth:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: abuf (8 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[8])
+ */
+ CFI_STARTPROC();
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ movdqu (%rdx), RTMP0;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ movdqu (n * 16)(%rsi), xreg; \
+ movdqu (lreg), RNOT; \
+ pxor RNOT, RTMP0; \
+ pxor RTMP0, xreg;
+ movq (0 * 8)(%r8), %r10;
+ movq (1 * 8)(%r8), %r11;
+ movq (2 * 8)(%r8), %r12;
+ movq (3 * 8)(%r8), %r13;
+ OCB_INPUT(0, %r10, RA0);
+ OCB_INPUT(1, %r11, RA1);
+ OCB_INPUT(2, %r12, RA2);
+ OCB_INPUT(3, %r13, RA3);
+ movq (4 * 8)(%r8), %r10;
+ movq (5 * 8)(%r8), %r11;
+ movq (6 * 8)(%r8), %r12;
+ movq (7 * 8)(%r8), %r13;
+ OCB_INPUT(4, %r10, RB0);
+ OCB_INPUT(5, %r11, RB1);
+ OCB_INPUT(6, %r12, RB2);
+ OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+ movdqu RTMP0, (%rdx);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __serpent_enc_blk8;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ movdqu (%rcx), RTMP0;
+ pxor RB4, RA4;
+ pxor RB1, RA1;
+ pxor RB2, RA2;
+ pxor RB0, RA0;
+
+ pxor RTMP0, RA2;
+ pxor RA4, RA1;
+ pxor RA2, RA0;
+
+ pxor RA1, RA0;
+ movdqu RA0, (%rcx);
+
+ /* clear the used registers */
+ pxor RA0, RA0;
+ pxor RA1, RA1;
+ pxor RA2, RA2;
+ pxor RA3, RA3;
+ pxor RA4, RA4;
+ pxor RB0, RB0;
+ pxor RB1, RB1;
+ pxor RB2, RB2;
+ pxor RB3, RB3;
+ pxor RB4, RB4;
+ pxor RTMP0, RTMP0;
+ pxor RTMP1, RTMP1;
+ pxor RTMP2, RTMP2;
+ pxor RNOT, RNOT;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ocb_auth,.-_gcry_serpent_sse2_ocb_auth;)
+
+#endif /*defined(USE_SERPENT)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/serpent.c b/comm/third_party/libgcrypt/cipher/serpent.c
new file mode 100644
index 0000000000..3c5eed2c03
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/serpent.c
@@ -0,0 +1,1807 @@
+/* serpent.c - Implementation of the Serpent encryption algorithm.
+ * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <string.h>
+#include <stdio.h>
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+
+/* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
+#undef USE_SSE2
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSE2 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# if defined(ENABLE_AVX2_SUPPORT)
+# define USE_AVX2 1
+# endif
+#endif
+
+/* USE_NEON indicates whether to enable ARM NEON assembly code. */
+#undef USE_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_NEON 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+/* Number of rounds per Serpent encrypt/decrypt operation. */
+#define ROUNDS 32
+
+/* Magic number, used during generating of the subkeys. */
+#define PHI 0x9E3779B9
+
+/* Serpent works on 128 bit blocks. */
+typedef u32 serpent_block_t[4];
+
+/* Serpent key, provided by the user. If the original key is shorter
+ than 256 bits, it is padded. */
+typedef u32 serpent_key_t[8];
+
+/* The key schedule consists of 33 128 bit subkeys. */
+typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
+
+/* A Serpent context. */
+typedef struct serpent_context
+{
+ serpent_subkeys_t keys; /* Generated subkeys. */
+
+#ifdef USE_AVX2
+ int use_avx2;
+#endif
+#ifdef USE_NEON
+ int use_neon;
+#endif
+} serpent_context_t;
+
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_SSE2) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+# define ASM_FUNC_ABI
+# endif
+#endif
+
+
+#ifdef USE_SSE2
+/* Assembler implementations of Serpent using SSE2. Process 8 block in
+ parallel.
+ */
+extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[8]) ASM_FUNC_ABI;
+#endif
+
+#ifdef USE_AVX2
+/* Assembler implementations of Serpent using AVX2. Process 16 block in
+ parallel.
+ */
+extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+#endif
+
+#ifdef USE_NEON
+/* Assembler implementations of Serpent using ARM NEON. Process 8 block in
+ parallel.
+ */
+extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr);
+
+extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+
+extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+
+extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const void *Ls[8]);
+
+extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const void *Ls[8]);
+
+extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const void *Ls[8]);
+#endif
+
+
+/* Prototypes. */
+static const char *serpent_test (void);
+
+static void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_serpent_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_serpent_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
+
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ * (New York, New York, USA), p. 317–329, National Institute of Standards and
+ * Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+
+#define SBOX0(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r3 ^= r0; r4 = r1; \
+ r1 &= r3; r4 ^= r2; \
+ r1 ^= r0; r0 |= r3; \
+ r0 ^= r4; r4 ^= r3; \
+ r3 ^= r2; r2 |= r1; \
+ r2 ^= r4; r4 = ~r4; \
+ r4 |= r1; r1 ^= r3; \
+ r1 ^= r4; r3 |= r0; \
+ r1 ^= r3; r4 ^= r3; \
+ \
+ w = r1; x = r4; y = r2; z = r0; \
+ }
+
+#define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r2 = ~r2; r4 = r1; \
+ r1 |= r0; r4 = ~r4; \
+ r1 ^= r2; r2 |= r4; \
+ r1 ^= r3; r0 ^= r4; \
+ r2 ^= r0; r0 &= r3; \
+ r4 ^= r0; r0 |= r1; \
+ r0 ^= r2; r3 ^= r4; \
+ r2 ^= r1; r3 ^= r0; \
+ r3 ^= r1; \
+ r2 &= r3; \
+ r4 ^= r2; \
+ \
+ w = r0; x = r4; y = r1; z = r3; \
+ }
+
+#define SBOX1(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r0 = ~r0; r2 = ~r2; \
+ r4 = r0; r0 &= r1; \
+ r2 ^= r0; r0 |= r3; \
+ r3 ^= r2; r1 ^= r0; \
+ r0 ^= r4; r4 |= r1; \
+ r1 ^= r3; r2 |= r0; \
+ r2 &= r4; r0 ^= r1; \
+ r1 &= r2; \
+ r1 ^= r0; r0 &= r2; \
+ r0 ^= r4; \
+ \
+ w = r2; x = r0; y = r3; z = r1; \
+ }
+
+#define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r4 = r1; r1 ^= r3; \
+ r3 &= r1; r4 ^= r2; \
+ r3 ^= r0; r0 |= r1; \
+ r2 ^= r3; r0 ^= r4; \
+ r0 |= r2; r1 ^= r3; \
+ r0 ^= r1; r1 |= r3; \
+ r1 ^= r0; r4 = ~r4; \
+ r4 ^= r1; r1 |= r0; \
+ r1 ^= r0; \
+ r1 |= r4; \
+ r3 ^= r1; \
+ \
+ w = r4; x = r0; y = r3; z = r2; \
+ }
+
+#define SBOX2(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r4 = r0; r0 &= r2; \
+ r0 ^= r3; r2 ^= r1; \
+ r2 ^= r0; r3 |= r4; \
+ r3 ^= r1; r4 ^= r2; \
+ r1 = r3; r3 |= r4; \
+ r3 ^= r0; r0 &= r1; \
+ r4 ^= r0; r1 ^= r3; \
+ r1 ^= r4; r4 = ~r4; \
+ \
+ w = r2; x = r3; y = r1; z = r4; \
+ }
+
+#define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r2 ^= r3; r3 ^= r0; \
+ r4 = r3; r3 &= r2; \
+ r3 ^= r1; r1 |= r2; \
+ r1 ^= r4; r4 &= r3; \
+ r2 ^= r3; r4 &= r0; \
+ r4 ^= r2; r2 &= r1; \
+ r2 |= r0; r3 = ~r3; \
+ r2 ^= r3; r0 ^= r3; \
+ r0 &= r1; r3 ^= r4; \
+ r3 ^= r0; \
+ \
+ w = r1; x = r4; y = r2; z = r3; \
+ }
+
+#define SBOX3(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r4 = r0; r0 |= r3; \
+ r3 ^= r1; r1 &= r4; \
+ r4 ^= r2; r2 ^= r3; \
+ r3 &= r0; r4 |= r1; \
+ r3 ^= r4; r0 ^= r1; \
+ r4 &= r0; r1 ^= r3; \
+ r4 ^= r2; r1 |= r0; \
+ r1 ^= r2; r0 ^= r3; \
+ r2 = r1; r1 |= r3; \
+ r1 ^= r0; \
+ \
+ w = r1; x = r2; y = r3; z = r4; \
+ }
+
+#define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r4 = r2; r2 ^= r1; \
+ r0 ^= r2; r4 &= r2; \
+ r4 ^= r0; r0 &= r1; \
+ r1 ^= r3; r3 |= r4; \
+ r2 ^= r3; r0 ^= r3; \
+ r1 ^= r4; r3 &= r2; \
+ r3 ^= r1; r1 ^= r0; \
+ r1 |= r2; r0 ^= r3; \
+ r1 ^= r4; \
+ r0 ^= r1; \
+ \
+ w = r2; x = r1; y = r3; z = r0; \
+ }
+
+#define SBOX4(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r1 ^= r3; r3 = ~r3; \
+ r2 ^= r3; r3 ^= r0; \
+ r4 = r1; r1 &= r3; \
+ r1 ^= r2; r4 ^= r3; \
+ r0 ^= r4; r2 &= r4; \
+ r2 ^= r0; r0 &= r1; \
+ r3 ^= r0; r4 |= r1; \
+ r4 ^= r0; r0 |= r3; \
+ r0 ^= r2; r2 &= r3; \
+ r0 = ~r0; r4 ^= r2; \
+ \
+ w = r1; x = r4; y = r0; z = r3; \
+ }
+
+#define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r4 = r2; r2 &= r3; \
+ r2 ^= r1; r1 |= r3; \
+ r1 &= r0; r4 ^= r2; \
+ r4 ^= r1; r1 &= r2; \
+ r0 = ~r0; r3 ^= r4; \
+ r1 ^= r3; r3 &= r0; \
+ r3 ^= r2; r0 ^= r1; \
+ r2 &= r0; r3 ^= r0; \
+ r2 ^= r4; \
+ r2 |= r3; r3 ^= r0; \
+ r2 ^= r1; \
+ \
+ w = r0; x = r3; y = r2; z = r4; \
+ }
+
+#define SBOX5(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r0 ^= r1; r1 ^= r3; \
+ r3 = ~r3; r4 = r1; \
+ r1 &= r0; r2 ^= r3; \
+ r1 ^= r2; r2 |= r4; \
+ r4 ^= r3; r3 &= r1; \
+ r3 ^= r0; r4 ^= r1; \
+ r4 ^= r2; r2 ^= r0; \
+ r0 &= r3; r2 = ~r2; \
+ r0 ^= r4; r4 |= r3; \
+ r2 ^= r4; \
+ \
+ w = r1; x = r3; y = r0; z = r2; \
+ }
+
+#define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r1 = ~r1; r4 = r3; \
+ r2 ^= r1; r3 |= r0; \
+ r3 ^= r2; r2 |= r1; \
+ r2 &= r0; r4 ^= r3; \
+ r2 ^= r4; r4 |= r0; \
+ r4 ^= r1; r1 &= r2; \
+ r1 ^= r3; r4 ^= r2; \
+ r3 &= r4; r4 ^= r1; \
+ r3 ^= r4; r4 = ~r4; \
+ r3 ^= r0; \
+ \
+ w = r1; x = r4; y = r3; z = r2; \
+ }
+
+#define SBOX6(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r2 = ~r2; r4 = r3; \
+ r3 &= r0; r0 ^= r4; \
+ r3 ^= r2; r2 |= r4; \
+ r1 ^= r3; r2 ^= r0; \
+ r0 |= r1; r2 ^= r1; \
+ r4 ^= r0; r0 |= r3; \
+ r0 ^= r2; r4 ^= r3; \
+ r4 ^= r0; r3 = ~r3; \
+ r2 &= r4; \
+ r2 ^= r3; \
+ \
+ w = r0; x = r1; y = r4; z = r2; \
+ }
+
+#define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r0 ^= r2; r4 = r2; \
+ r2 &= r0; r4 ^= r3; \
+ r2 = ~r2; r3 ^= r1; \
+ r2 ^= r3; r4 |= r0; \
+ r0 ^= r2; r3 ^= r4; \
+ r4 ^= r1; r1 &= r3; \
+ r1 ^= r0; r0 ^= r3; \
+ r0 |= r2; r3 ^= r1; \
+ r4 ^= r0; \
+ \
+ w = r1; x = r2; y = r4; z = r3; \
+ }
+
+#define SBOX7(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r4 = r1; r1 |= r2; \
+ r1 ^= r3; r4 ^= r2; \
+ r2 ^= r1; r3 |= r4; \
+ r3 &= r0; r4 ^= r2; \
+ r3 ^= r1; r1 |= r4; \
+ r1 ^= r0; r0 |= r4; \
+ r0 ^= r2; r1 ^= r4; \
+ r2 ^= r1; r1 &= r0; \
+ r1 ^= r4; r2 = ~r2; \
+ r2 |= r0; \
+ r4 ^= r2; \
+ \
+ w = r4; x = r3; y = r1; z = r0; \
+ }
+
+#define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
+ { \
+ u32 r4; \
+ \
+ r4 = r2; r2 ^= r0; \
+ r0 &= r3; r4 |= r3; \
+ r2 = ~r2; r3 ^= r1; \
+ r1 |= r0; r0 ^= r2; \
+ r2 &= r4; r3 &= r4; \
+ r1 ^= r2; r2 ^= r0; \
+ r0 |= r2; r4 ^= r1; \
+ r0 ^= r3; r3 ^= r4; \
+ r4 |= r0; r3 ^= r2; \
+ r4 ^= r2; \
+ \
+ w = r3; x = r0; y = r1; z = r4; \
+ }
+
+/* XOR BLOCK1 into BLOCK0. */
+#define BLOCK_XOR(block0, block1) \
+ { \
+ block0[0] ^= block1[0]; \
+ block0[1] ^= block1[1]; \
+ block0[2] ^= block1[2]; \
+ block0[3] ^= block1[3]; \
+ }
+
+/* Copy BLOCK_SRC to BLOCK_DST. */
+#define BLOCK_COPY(block_dst, block_src) \
+ { \
+ block_dst[0] = block_src[0]; \
+ block_dst[1] = block_src[1]; \
+ block_dst[2] = block_src[2]; \
+ block_dst[3] = block_src[3]; \
+ }
+
+/* Apply SBOX number WHICH to to the block found in ARRAY0, writing
+ the output to the block found in ARRAY1. */
+#define SBOX(which, array0, array1) \
+ SBOX##which (array0[0], array0[1], array0[2], array0[3], \
+ array1[0], array1[1], array1[2], array1[3]);
+
+/* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
+ the output to the block found in ARRAY1. */
+#define SBOX_INVERSE(which, array0, array1) \
+ SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3], \
+ array1[0], array1[1], array1[2], array1[3]);
+
+/* Apply the linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION(block) \
+ { \
+ block[0] = rol (block[0], 13); \
+ block[2] = rol (block[2], 3); \
+ block[1] = block[1] ^ block[0] ^ block[2]; \
+ block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
+ block[1] = rol (block[1], 1); \
+ block[3] = rol (block[3], 7); \
+ block[0] = block[0] ^ block[1] ^ block[3]; \
+ block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
+ block[0] = rol (block[0], 5); \
+ block[2] = rol (block[2], 22); \
+ }
+
+/* Apply the inverse linear transformation to BLOCK. */
+#define LINEAR_TRANSFORMATION_INVERSE(block) \
+ { \
+ block[2] = ror (block[2], 22); \
+ block[0] = ror (block[0] , 5); \
+ block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
+ block[0] = block[0] ^ block[1] ^ block[3]; \
+ block[3] = ror (block[3], 7); \
+ block[1] = ror (block[1], 1); \
+ block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
+ block[1] = block[1] ^ block[0] ^ block[2]; \
+ block[2] = ror (block[2], 3); \
+ block[0] = ror (block[0], 13); \
+ }
+
+/* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the
+ subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage.
+ This macro increments `round'. */
+#define ROUND(which, subkeys, block, block_tmp) \
+ { \
+ BLOCK_XOR (block, subkeys[round]); \
+ round++; \
+ SBOX (which, block, block_tmp); \
+ LINEAR_TRANSFORMATION (block_tmp); \
+ BLOCK_COPY (block, block_tmp); \
+ }
+
+/* Apply the last Serpent round to BLOCK, using the SBOX number WHICH
+ and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary
+ storage. The result will be stored in BLOCK_TMP. This macro
+ increments `round'. */
+#define ROUND_LAST(which, subkeys, block, block_tmp) \
+ { \
+ BLOCK_XOR (block, subkeys[round]); \
+ round++; \
+ SBOX (which, block, block_tmp); \
+ BLOCK_XOR (block_tmp, subkeys[round]); \
+ round++; \
+ }
+
+/* Apply an inverse Serpent round to BLOCK, using the SBOX number
+ WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as
+ temporary storage. This macro increments `round'. */
+#define ROUND_INVERSE(which, subkey, block, block_tmp) \
+ { \
+ LINEAR_TRANSFORMATION_INVERSE (block); \
+ SBOX_INVERSE (which, block, block_tmp); \
+ BLOCK_XOR (block_tmp, subkey[round]); \
+ round--; \
+ BLOCK_COPY (block, block_tmp); \
+ }
+
+/* Apply the first Serpent round to BLOCK, using the SBOX number WHICH
+ and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary
+ storage. The result will be stored in BLOCK_TMP. This macro
+ increments `round'. */
+#define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \
+ { \
+ BLOCK_XOR (block, subkeys[round]); \
+ round--; \
+ SBOX_INVERSE (which, block, block_tmp); \
+ BLOCK_XOR (block_tmp, subkeys[round]); \
+ round--; \
+ }
+
+/* Convert the user provided key KEY of KEY_LENGTH bytes into the
+ internally used format. */
+static void
+serpent_key_prepare (const byte *key, unsigned int key_length,
+ serpent_key_t key_prepared)
+{
+ int i;
+
+ /* Copy key. */
+ key_length /= 4;
+ for (i = 0; i < key_length; i++)
+ key_prepared[i] = buf_get_le32 (key + i * 4);
+
+ if (i < 8)
+ {
+ /* Key must be padded according to the Serpent
+ specification. */
+ key_prepared[i] = 0x00000001;
+
+ for (i++; i < 8; i++)
+ key_prepared[i] = 0;
+ }
+}
+
+/* Derive the 33 subkeys from KEY and store them in SUBKEYS. */
+static void
+serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
+{
+ u32 w[8]; /* The `prekey'. */
+ u32 ws[4];
+ u32 wt[4];
+
+ /* Initialize with key values. */
+ w[0] = key[0];
+ w[1] = key[1];
+ w[2] = key[2];
+ w[3] = key[3];
+ w[4] = key[4];
+ w[5] = key[5];
+ w[6] = key[6];
+ w[7] = key[7];
+
+ /* Expand to intermediate key using the affine recurrence. */
+#define EXPAND_KEY4(wo, r) \
+ wo[0] = w[(r+0)%8] = \
+ rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \
+ wo[1] = w[(r+1)%8] = \
+ rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \
+ wo[2] = w[(r+2)%8] = \
+ rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \
+ wo[3] = w[(r+3)%8] = \
+ rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
+
+#define EXPAND_KEY(r) \
+ EXPAND_KEY4(ws, (r)); \
+ EXPAND_KEY4(wt, (r + 4));
+
+ /* Calculate subkeys via S-Boxes, in bitslice mode. */
+ EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
+ EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
+ EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
+ EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
+ EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
+ EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
+ EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
+ EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
+ EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
+ EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
+ EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
+ EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
+ EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
+ EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
+ EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
+ EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
+ EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
+
+ wipememory (ws, sizeof (ws));
+ wipememory (wt, sizeof (wt));
+ wipememory (w, sizeof (w));
+}
+
+/* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */
+static void
+serpent_setkey_internal (serpent_context_t *context,
+ const byte *key, unsigned int key_length)
+{
+ serpent_key_t key_prepared;
+
+ serpent_key_prepare (key, key_length, key_prepared);
+ serpent_subkeys_generate (key_prepared, context->keys);
+
+#ifdef USE_AVX2
+ context->use_avx2 = 0;
+ if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
+ {
+ context->use_avx2 = 1;
+ }
+#endif
+
+#ifdef USE_NEON
+ context->use_neon = 0;
+ if ((_gcry_get_hw_features () & HWF_ARM_NEON))
+ {
+ context->use_neon = 1;
+ }
+#endif
+
+ wipememory (key_prepared, sizeof(key_prepared));
+}
+
+/* Initialize CTX with the key KEY of KEY_LENGTH bytes. */
+static gcry_err_code_t
+serpent_setkey (void *ctx,
+ const byte *key, unsigned int key_length,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ serpent_context_t *context = ctx;
+ static const char *serpent_test_ret;
+ static int serpent_init_done;
+ gcry_err_code_t ret = GPG_ERR_NO_ERROR;
+
+ if (! serpent_init_done)
+ {
+ /* Execute a self-test the first time, Serpent is used. */
+ serpent_init_done = 1;
+ serpent_test_ret = serpent_test ();
+ if (serpent_test_ret)
+ log_error ("Serpent test failure: %s\n", serpent_test_ret);
+ }
+
+ /* Setup bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cbc_dec = _gcry_serpent_cbc_dec;
+ bulk_ops->cfb_dec = _gcry_serpent_cfb_dec;
+ bulk_ops->ctr_enc = _gcry_serpent_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_serpent_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_serpent_ocb_auth;
+
+ if (serpent_test_ret)
+ ret = GPG_ERR_SELFTEST_FAILED;
+ else
+ serpent_setkey_internal (context, key, key_length);
+
+ return ret;
+}
+
+static void
+serpent_encrypt_internal (serpent_context_t *context,
+ const byte *input, byte *output)
+{
+ serpent_block_t b, b_next;
+ int round = 0;
+
+ b[0] = buf_get_le32 (input + 0);
+ b[1] = buf_get_le32 (input + 4);
+ b[2] = buf_get_le32 (input + 8);
+ b[3] = buf_get_le32 (input + 12);
+
+ ROUND (0, context->keys, b, b_next);
+ ROUND (1, context->keys, b, b_next);
+ ROUND (2, context->keys, b, b_next);
+ ROUND (3, context->keys, b, b_next);
+ ROUND (4, context->keys, b, b_next);
+ ROUND (5, context->keys, b, b_next);
+ ROUND (6, context->keys, b, b_next);
+ ROUND (7, context->keys, b, b_next);
+ ROUND (0, context->keys, b, b_next);
+ ROUND (1, context->keys, b, b_next);
+ ROUND (2, context->keys, b, b_next);
+ ROUND (3, context->keys, b, b_next);
+ ROUND (4, context->keys, b, b_next);
+ ROUND (5, context->keys, b, b_next);
+ ROUND (6, context->keys, b, b_next);
+ ROUND (7, context->keys, b, b_next);
+ ROUND (0, context->keys, b, b_next);
+ ROUND (1, context->keys, b, b_next);
+ ROUND (2, context->keys, b, b_next);
+ ROUND (3, context->keys, b, b_next);
+ ROUND (4, context->keys, b, b_next);
+ ROUND (5, context->keys, b, b_next);
+ ROUND (6, context->keys, b, b_next);
+ ROUND (7, context->keys, b, b_next);
+ ROUND (0, context->keys, b, b_next);
+ ROUND (1, context->keys, b, b_next);
+ ROUND (2, context->keys, b, b_next);
+ ROUND (3, context->keys, b, b_next);
+ ROUND (4, context->keys, b, b_next);
+ ROUND (5, context->keys, b, b_next);
+ ROUND (6, context->keys, b, b_next);
+
+ ROUND_LAST (7, context->keys, b, b_next);
+
+ buf_put_le32 (output + 0, b_next[0]);
+ buf_put_le32 (output + 4, b_next[1]);
+ buf_put_le32 (output + 8, b_next[2]);
+ buf_put_le32 (output + 12, b_next[3]);
+}
+
+static void
+serpent_decrypt_internal (serpent_context_t *context,
+ const byte *input, byte *output)
+{
+ serpent_block_t b, b_next;
+ int round = ROUNDS;
+
+ b_next[0] = buf_get_le32 (input + 0);
+ b_next[1] = buf_get_le32 (input + 4);
+ b_next[2] = buf_get_le32 (input + 8);
+ b_next[3] = buf_get_le32 (input + 12);
+
+ ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
+
+ ROUND_INVERSE (6, context->keys, b, b_next);
+ ROUND_INVERSE (5, context->keys, b, b_next);
+ ROUND_INVERSE (4, context->keys, b, b_next);
+ ROUND_INVERSE (3, context->keys, b, b_next);
+ ROUND_INVERSE (2, context->keys, b, b_next);
+ ROUND_INVERSE (1, context->keys, b, b_next);
+ ROUND_INVERSE (0, context->keys, b, b_next);
+ ROUND_INVERSE (7, context->keys, b, b_next);
+ ROUND_INVERSE (6, context->keys, b, b_next);
+ ROUND_INVERSE (5, context->keys, b, b_next);
+ ROUND_INVERSE (4, context->keys, b, b_next);
+ ROUND_INVERSE (3, context->keys, b, b_next);
+ ROUND_INVERSE (2, context->keys, b, b_next);
+ ROUND_INVERSE (1, context->keys, b, b_next);
+ ROUND_INVERSE (0, context->keys, b, b_next);
+ ROUND_INVERSE (7, context->keys, b, b_next);
+ ROUND_INVERSE (6, context->keys, b, b_next);
+ ROUND_INVERSE (5, context->keys, b, b_next);
+ ROUND_INVERSE (4, context->keys, b, b_next);
+ ROUND_INVERSE (3, context->keys, b, b_next);
+ ROUND_INVERSE (2, context->keys, b, b_next);
+ ROUND_INVERSE (1, context->keys, b, b_next);
+ ROUND_INVERSE (0, context->keys, b, b_next);
+ ROUND_INVERSE (7, context->keys, b, b_next);
+ ROUND_INVERSE (6, context->keys, b, b_next);
+ ROUND_INVERSE (5, context->keys, b, b_next);
+ ROUND_INVERSE (4, context->keys, b, b_next);
+ ROUND_INVERSE (3, context->keys, b, b_next);
+ ROUND_INVERSE (2, context->keys, b, b_next);
+ ROUND_INVERSE (1, context->keys, b, b_next);
+ ROUND_INVERSE (0, context->keys, b, b_next);
+
+ buf_put_le32 (output + 0, b_next[0]);
+ buf_put_le32 (output + 4, b_next[1]);
+ buf_put_le32 (output + 8, b_next[2]);
+ buf_put_le32 (output + 12, b_next[3]);
+}
+
+static unsigned int
+serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
+{
+ serpent_context_t *context = ctx;
+
+ serpent_encrypt_internal (context, buffer_in, buffer_out);
+ return /*burn_stack*/ (2 * sizeof (serpent_block_t));
+}
+
+static unsigned int
+serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
+{
+ serpent_context_t *context = ctx;
+
+ serpent_decrypt_internal (context, buffer_in, buffer_out);
+ return /*burn_stack*/ (2 * sizeof (serpent_block_t));
+}
+
+
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size sizeof(serpent_block_t). */
+static void
+_gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ serpent_context_t *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[sizeof(serpent_block_t)];
+ int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic/sse2 code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-sse2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ serpent_encrypt_internal(ctx, ctr, tmpbuf);
+ /* XOR the input with the encrypted counter and store in output. */
+ cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
+ outbuf += sizeof(serpent_block_t);
+ inbuf += sizeof(serpent_block_t);
+ /* Increment the counter. */
+ cipher_block_add(ctr, 1, sizeof(serpent_block_t));
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_serpent_cbc_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ serpent_context_t *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[sizeof(serpent_block_t)];
+ int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic/sse2 code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-sse2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ serpent_decrypt_internal (ctx, inbuf, savebuf);
+
+ cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+ sizeof(serpent_block_t));
+ inbuf += sizeof(serpent_block_t);
+ outbuf += sizeof(serpent_block_t);
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_serpent_cfb_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ serpent_context_t *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic/sse2 code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-sse2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ serpent_encrypt_internal(ctx, iv, iv);
+ cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
+ outbuf += sizeof(serpent_block_t);
+ inbuf += sizeof(serpent_block_t);
+ }
+
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+ serpent_context_t *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = 2 * sizeof (serpent_block_t);
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+#else
+ (void)c;
+ (void)outbuf_arg;
+ (void)inbuf_arg;
+ (void)encrypt;
+#endif
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ if (encrypt)
+ _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+ u64 Ls[8];
+ unsigned int n = 8 - (blkn % 8);
+ u64 *l;
+
+ if (nblocks >= 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ blkn += 8;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+ if (encrypt)
+ _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-sse2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+ const void *Ls[8];
+ unsigned int n = 8 - (blkn % 8);
+ const void **l;
+
+ if (nblocks >= 8)
+ {
+ Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ blkn += 8;
+ *l = ocb_get_l(c, blkn - blkn % 8);
+
+ if (encrypt)
+ _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+ c->u_mode.ocb.data_nblocks = blkn;
+
+ if (burn_stack_depth)
+ _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+ return nblocks;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks)
+{
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+ serpent_context_t *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ int burn_stack_depth = 2 * sizeof(serpent_block_t);
+ u64 blkn = c->u_mode.ocb.aad_nblocks;
+#else
+ (void)c;
+ (void)abuf_arg;
+#endif
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 16;
+ abuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+ u64 Ls[8];
+ unsigned int n = 8 - (blkn % 8);
+ u64 *l;
+
+ if (nblocks >= 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ blkn += 8;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+ _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 8;
+ abuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+ const void *Ls[8];
+ unsigned int n = 8 - (blkn % 8);
+ const void **l;
+
+ if (nblocks >= 8)
+ {
+ Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ blkn += 8;
+ *l = ocb_get_l(c, blkn - blkn % 8);
+
+ _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 8;
+ abuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+ c->u_mode.ocb.aad_nblocks = blkn;
+
+ if (burn_stack_depth)
+ _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+ return nblocks;
+}
+
+
+
+/* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+ const int nblocks = 16+8+1;
+ const int blocksize = sizeof(serpent_block_t);
+ const int context_size = sizeof(serpent_context_t);
+
+ return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey,
+ &serpent_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+ const int nblocks = 16+8+2;
+ const int blocksize = sizeof(serpent_block_t);
+ const int context_size = sizeof(serpent_context_t);
+
+ return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey,
+ &serpent_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+ const int nblocks = 16+8+2;
+ const int blocksize = sizeof(serpent_block_t);
+ const int context_size = sizeof(serpent_context_t);
+
+ return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey,
+ &serpent_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Serpent test. */
+
+static const char *
+serpent_test (void)
+{
+ serpent_context_t context;
+ unsigned char scratch[16];
+ unsigned int i;
+ const char *r;
+
+ static struct test
+ {
+ int key_length;
+ unsigned char key[32];
+ unsigned char text_plain[16];
+ unsigned char text_cipher[16];
+ } test_data[] =
+ {
+ {
+ 16,
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ "\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E",
+ "\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D"
+ },
+ {
+ 24,
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ "\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E",
+ "\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9"
+ },
+ {
+ 32,
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ "\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E",
+ "\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B"
+ },
+ {
+ 32,
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ "\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00",
+ "\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C"
+ },
+ {
+ 0
+ },
+ };
+
+ for (i = 0; test_data[i].key_length; i++)
+ {
+ serpent_setkey_internal (&context, test_data[i].key,
+ test_data[i].key_length);
+ serpent_encrypt_internal (&context, test_data[i].text_plain, scratch);
+
+ if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t)))
+ switch (test_data[i].key_length)
+ {
+ case 16:
+ return "Serpent-128 test encryption failed.";
+ case 24:
+ return "Serpent-192 test encryption failed.";
+ case 32:
+ return "Serpent-256 test encryption failed.";
+ }
+
+ serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch);
+ if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t)))
+ switch (test_data[i].key_length)
+ {
+ case 16:
+ return "Serpent-128 test decryption failed.";
+ case 24:
+ return "Serpent-192 test decryption failed.";
+ case 32:
+ return "Serpent-256 test decryption failed.";
+ }
+ }
+
+ if ( (r = selftest_ctr_128 ()) )
+ return r;
+
+ if ( (r = selftest_cbc_128 ()) )
+ return r;
+
+ if ( (r = selftest_cfb_128 ()) )
+ return r;
+
+ return NULL;
+}
+
+
+static gcry_cipher_oid_spec_t serpent128_oids[] =
+ {
+ {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB },
+ {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC },
+ {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB },
+ {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB },
+ { NULL }
+ };
+
+static gcry_cipher_oid_spec_t serpent192_oids[] =
+ {
+ {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB },
+ {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC },
+ {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB },
+ {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB },
+ { NULL }
+ };
+
+static gcry_cipher_oid_spec_t serpent256_oids[] =
+ {
+ {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB },
+ {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC },
+ {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB },
+ {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB },
+ { NULL }
+ };
+
+static const char *serpent128_aliases[] =
+ {
+ "SERPENT",
+ "SERPENT-128",
+ NULL
+ };
+static const char *serpent192_aliases[] =
+ {
+ "SERPENT-192",
+ NULL
+ };
+static const char *serpent256_aliases[] =
+ {
+ "SERPENT-256",
+ NULL
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
+ {
+ GCRY_CIPHER_SERPENT128, {0, 0},
+ "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128,
+ sizeof (serpent_context_t),
+ serpent_setkey, serpent_encrypt, serpent_decrypt
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
+ {
+ GCRY_CIPHER_SERPENT192, {0, 0},
+ "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192,
+ sizeof (serpent_context_t),
+ serpent_setkey, serpent_encrypt, serpent_decrypt
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
+ {
+ GCRY_CIPHER_SERPENT256, {0, 0},
+ "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256,
+ sizeof (serpent_context_t),
+ serpent_setkey, serpent_encrypt, serpent_decrypt
+ };
diff --git a/comm/third_party/libgcrypt/cipher/sha1-armv7-neon.S b/comm/third_party/libgcrypt/cipher/sha1-armv7-neon.S
new file mode 100644
index 0000000000..61cc541c68
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-armv7-neon.S
@@ -0,0 +1,526 @@
+/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
+ * Copyright (C) 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON) && defined(USE_SHA1)
+
+.syntax unified
+.fpu neon
+.arm
+
+.text
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xCA62C1D6
+.align 4
+gcry_sha1_armv7_neon_K_VEC:
+.LK_VEC:
+.LK1: .long K1, K1, K1, K1
+.LK2: .long K2, K2, K2, K2
+.LK3: .long K3, K3, K3, K3
+.LK4: .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define RSTATE r0
+#define RDATA r1
+#define RNBLKS r2
+#define ROLDSTACK r3
+#define RWK lr
+
+#define _a r4
+#define _b r5
+#define _c r6
+#define _d r7
+#define _e r8
+
+#define RT0 r9
+#define RT1 r10
+#define RT2 r11
+#define RT3 r12
+
+#define W0 q0
+#define W1 q1
+#define W2 q2
+#define W3 q3
+#define W4 q4
+#define W5 q5
+#define W6 q6
+#define W7 q7
+
+#define tmp0 q8
+#define tmp1 q9
+#define tmp2 q10
+#define tmp3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round function macros. */
+
+#define WK_offs(i) (((i) & 15) * 4)
+
+#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ bic RT0, d, b; \
+ add e, e, a, ror #(32 - 5); \
+ and RT1, c, b; \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add RT0, RT0, RT3; \
+ add e, e, RT1; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT0;
+
+#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ eor RT0, d, b; \
+ add e, e, a, ror #(32 - 5); \
+ eor RT0, RT0, c; \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT3; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT0; \
+
+#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ eor RT0, b, c; \
+ and RT1, b, c; \
+ add e, e, a, ror #(32 - 5); \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ and RT0, RT0, d; \
+ add RT1, RT1, RT3; \
+ add e, e, RT0; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT1;
+
+#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define R(a,b,c,d,e,f,i) \
+ _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define dummy(...)
+
+
+/* Input expansion macros. */
+
+/********* Precalc macros for rounds 0-15 *************************************/
+
+#define W_PRECALC_00_15() \
+ add RWK, sp, #(WK_offs(0)); \
+ \
+ vld1.32 {tmp0, tmp1}, [RDATA]!; \
+ vrev32.8 W0, tmp0; /* big => little */ \
+ vld1.32 {tmp2, tmp3}, [RDATA]!; \
+ vadd.u32 tmp0, W0, curK; \
+ vrev32.8 W7, tmp1; /* big => little */ \
+ vrev32.8 W6, tmp2; /* big => little */ \
+ vadd.u32 tmp1, W7, curK; \
+ vrev32.8 W5, tmp3; /* big => little */ \
+ vadd.u32 tmp2, W6, curK; \
+ vst1.32 {tmp0, tmp1}, [RWK]!; \
+ vadd.u32 tmp3, W5, curK; \
+ vst1.32 {tmp2, tmp3}, [RWK]; \
+
+#define WPRECALC_00_15_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vld1.32 {tmp0, tmp1}, [RDATA]!; \
+
+#define WPRECALC_00_15_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ add RWK, sp, #(WK_offs(0)); \
+
+#define WPRECALC_00_15_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vrev32.8 W0, tmp0; /* big => little */ \
+
+#define WPRECALC_00_15_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vld1.32 {tmp2, tmp3}, [RDATA]!; \
+
+#define WPRECALC_00_15_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vadd.u32 tmp0, W0, curK; \
+
+#define WPRECALC_00_15_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vrev32.8 W7, tmp1; /* big => little */ \
+
+#define WPRECALC_00_15_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vrev32.8 W6, tmp2; /* big => little */ \
+
+#define WPRECALC_00_15_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vadd.u32 tmp1, W7, curK; \
+
+#define WPRECALC_00_15_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vrev32.8 W5, tmp3; /* big => little */ \
+
+#define WPRECALC_00_15_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vadd.u32 tmp2, W6, curK; \
+
+#define WPRECALC_00_15_10(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vst1.32 {tmp0, tmp1}, [RWK]!; \
+
+#define WPRECALC_00_15_11(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vadd.u32 tmp3, W5, curK; \
+
+#define WPRECALC_00_15_12(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vst1.32 {tmp2, tmp3}, [RWK]; \
+
+
+/********* Precalc macros for rounds 16-31 ************************************/
+
+#define WPRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor tmp0, tmp0; \
+ vext.8 W, W_m16, W_m12, #8; \
+
+#define WPRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ add RWK, sp, #(WK_offs(i)); \
+ vext.8 tmp0, W_m04, tmp0, #4; \
+
+#define WPRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor tmp0, tmp0, W_m16; \
+ veor.32 W, W, W_m08; \
+
+#define WPRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor tmp1, tmp1; \
+ veor W, W, tmp0; \
+
+#define WPRECALC_16_31_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vshl.u32 tmp0, W, #1; \
+
+#define WPRECALC_16_31_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vext.8 tmp1, tmp1, W, #(16-12); \
+ vshr.u32 W, W, #31; \
+
+#define WPRECALC_16_31_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vorr tmp0, tmp0, W; \
+ vshr.u32 W, tmp1, #30; \
+
+#define WPRECALC_16_31_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vshl.u32 tmp1, tmp1, #2; \
+
+#define WPRECALC_16_31_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor tmp0, tmp0, W; \
+
+#define WPRECALC_16_31_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor W, tmp0, tmp1; \
+
+#define WPRECALC_16_31_10(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_16_31_11(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vst1.32 {tmp0}, [RWK];
+
+
+/********* Precalc macros for rounds 32-79 ************************************/
+
+#define WPRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor W, W_m28; \
+
+#define WPRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vext.8 tmp0, W_m08, W_m04, #8; \
+
+#define WPRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor W, W_m16; \
+
+#define WPRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ veor W, tmp0; \
+
+#define WPRECALC_32_79_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ add RWK, sp, #(WK_offs(i&~3)); \
+
+#define WPRECALC_32_79_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vshl.u32 tmp1, W, #2; \
+
+#define WPRECALC_32_79_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vshr.u32 tmp0, W, #30; \
+
+#define WPRECALC_32_79_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vorr W, tmp0, tmp1; \
+
+#define WPRECALC_32_79_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_32_79_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28) \
+ vst1.32 {tmp0}, [RWK];
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) veor reg, reg;
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_armv7_neon (void *ctx, const unsigned char *data,
+ * size_t nblks)
+ */
+.align 3
+.globl _gcry_sha1_transform_armv7_neon
+.type _gcry_sha1_transform_armv7_neon,%function;
+_gcry_sha1_transform_armv7_neon:
+ /* input:
+ * r0: ctx, CTX
+ * r1: data (64*nblks bytes)
+ * r2: nblks
+ */
+
+ cmp RNBLKS, #0;
+ beq .Ldo_nothing;
+
+ push {r4-r12, lr};
+
+ GET_DATA_POINTER(RT3, .LK_VEC, _a);
+ vpush {q4-q7};
+
+ mov ROLDSTACK, sp;
+
+ /* Align stack. */
+ sub sp, #(16*4);
+ and sp, #(~(16-1));
+
+ vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
+
+ /* Get the values of the chaining variables. */
+ ldm RSTATE, {_a-_e};
+
+ vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
+
+#undef curK
+#define curK qK1
+ /* Precalc 0-15. */
+ W_PRECALC_00_15();
+
+ b .Loop;
+
+.ltorg
+.Loop:
+ /* Transform 0-15 + Precalc 16-31. */
+ _R( _a, _b, _c, _d, _e, F1, 0, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, W4, W5, W6, W7, W0, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 1, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, W4, W5, W6, W7, W0, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F1, 2, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, W4, W5, W6, W7, W0, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F1, 3, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, W4, W5, W6, W7, W0, _, _, _ );
+
+#undef curK
+#define curK qK2
+ _R( _b, _c, _d, _e, _a, F1, 4, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, W3, W4, W5, W6, W7, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 5, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, W3, W4, W5, W6, W7, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 6, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, W3, W4, W5, W6, W7, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F1, 7, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, W3, W4, W5, W6, W7, _, _, _ );
+
+ _R( _c, _d, _e, _a, _b, F1, 8, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, W2, W3, W4, W5, W6, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F1, 9, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, W2, W3, W4, W5, W6, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 10, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, W2, W3, W4, W5, W6, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 11, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, W2, W3, W4, W5, W6, _, _, _ );
+
+ _R( _d, _e, _a, _b, _c, F1, 12, WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, W1, W2, W3, W4, W5, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F1, 13, WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, W1, W2, W3, W4, W5, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F1, 14, WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, W1, W2, W3, W4, W5, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 15, WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, W1, W2, W3, W4, W5, _, _, _ );
+
+ /* Transform 16-63 + Precalc 32-79. */
+ _R( _e, _a, _b, _c, _d, F1, 16, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _d, _e, _a, _b, _c, F1, 17, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _c, _d, _e, _a, _b, F1, 18, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _b, _c, _d, _e, _a, F1, 19, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+
+ _R( _a, _b, _c, _d, _e, F2, 20, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _e, _a, _b, _c, _d, F2, 21, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _d, _e, _a, _b, _c, F2, 22, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _c, _d, _e, _a, _b, F2, 23, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+
+#undef curK
+#define curK qK3
+ _R( _b, _c, _d, _e, _a, F2, 24, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _a, _b, _c, _d, _e, F2, 25, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _e, _a, _b, _c, _d, F2, 26, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _d, _e, _a, _b, _c, F2, 27, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+
+ _R( _c, _d, _e, _a, _b, F2, 28, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _b, _c, _d, _e, _a, F2, 29, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _a, _b, _c, _d, _e, F2, 30, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _e, _a, _b, _c, _d, F2, 31, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+
+ _R( _d, _e, _a, _b, _c, F2, 32, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _c, _d, _e, _a, _b, F2, 33, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _b, _c, _d, _e, _a, F2, 34, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _a, _b, _c, _d, _e, F2, 35, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+
+ _R( _e, _a, _b, _c, _d, F2, 36, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _d, _e, _a, _b, _c, F2, 37, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _c, _d, _e, _a, _b, F2, 38, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _b, _c, _d, _e, _a, F2, 39, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+
+ _R( _a, _b, _c, _d, _e, F3, 40, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _e, _a, _b, _c, _d, F3, 41, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _d, _e, _a, _b, _c, F3, 42, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _c, _d, _e, _a, _b, F3, 43, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+
+#undef curK
+#define curK qK4
+ _R( _b, _c, _d, _e, _a, F3, 44, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _a, _b, _c, _d, _e, F3, 45, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _e, _a, _b, _c, _d, F3, 46, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _d, _e, _a, _b, _c, F3, 47, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+
+ _R( _c, _d, _e, _a, _b, F3, 48, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _b, _c, _d, _e, _a, F3, 49, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _a, _b, _c, _d, _e, F3, 50, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _e, _a, _b, _c, _d, F3, 51, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+
+ _R( _d, _e, _a, _b, _c, F3, 52, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _c, _d, _e, _a, _b, F3, 53, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _b, _c, _d, _e, _a, F3, 54, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _a, _b, _c, _d, _e, F3, 55, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+
+ _R( _e, _a, _b, _c, _d, F3, 56, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _d, _e, _a, _b, _c, F3, 57, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _c, _d, _e, _a, _b, F3, 58, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _b, _c, _d, _e, _a, F3, 59, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+
+ subs RNBLKS, #1;
+
+ _R( _a, _b, _c, _d, _e, F4, 60, WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _e, _a, _b, _c, _d, F4, 61, WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _d, _e, _a, _b, _c, F4, 62, WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _c, _d, _e, _a, _b, F4, 63, WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+
+ beq .Lend;
+
+ /* Transform 64-79 + Precalc 0-15 of next block. */
+#undef curK
+#define curK qK1
+ _R( _b, _c, _d, _e, _a, F4, 64, WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 65, WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F4, 66, WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F4, 67, WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _c, _d, _e, _a, _b, F4, 68, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 69, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 70, WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F4, 71, WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _d, _e, _a, _b, _c, F4, 72, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F4, 73, dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 74, WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 75, WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _e, _a, _b, _c, _d, F4, 76, WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F4, 77, WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F4, 78, WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 79, WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
+
+ /* Update the chaining variables. */
+ ldm RSTATE, {RT0-RT3};
+ add _a, RT0;
+ ldr RT0, [RSTATE, #state_h4];
+ add _b, RT1;
+ add _c, RT2;
+ add _d, RT3;
+ add _e, RT0;
+ stm RSTATE, {_a-_e};
+
+ b .Loop;
+
+.ltorg
+.Lend:
+ /* Transform 64-79 + Clear XMM registers. */
+ R( _b, _c, _d, _e, _a, F4, 64 );
+ R( _a, _b, _c, _d, _e, F4, 65 ); CLEAR_REG(tmp0);
+ R( _e, _a, _b, _c, _d, F4, 66 ); CLEAR_REG(tmp1);
+ R( _d, _e, _a, _b, _c, F4, 67 ); CLEAR_REG(W0);
+ R( _c, _d, _e, _a, _b, F4, 68 ); CLEAR_REG(W1);
+ R( _b, _c, _d, _e, _a, F4, 69 ); CLEAR_REG(W2);
+ R( _a, _b, _c, _d, _e, F4, 70 ); CLEAR_REG(W3);
+ R( _e, _a, _b, _c, _d, F4, 71 ); CLEAR_REG(W4);
+ R( _d, _e, _a, _b, _c, F4, 72 ); CLEAR_REG(W5);
+ R( _c, _d, _e, _a, _b, F4, 73 ); CLEAR_REG(W6);
+ R( _b, _c, _d, _e, _a, F4, 74 ); CLEAR_REG(W7);
+ R( _a, _b, _c, _d, _e, F4, 75 );
+ R( _e, _a, _b, _c, _d, F4, 76 );
+ R( _d, _e, _a, _b, _c, F4, 77 );
+ R( _c, _d, _e, _a, _b, F4, 78 );
+ R( _b, _c, _d, _e, _a, F4, 79 );
+
+ mov sp, ROLDSTACK;
+
+ /* Update the chaining variables. */
+ ldm RSTATE, {RT0-RT3};
+ add _a, RT0;
+ ldr RT0, [RSTATE, #state_h4];
+ add _b, RT1;
+ add _c, RT2;
+ add _d, RT3;
+ vpop {q4-q7};
+ add _e, RT0;
+ stm RSTATE, {_a-_e};
+
+ /* burn_stack */
+ mov r0, #(16*4 + 16*4 + 15);
+
+ pop {r4-r12, pc};
+
+.Ldo_nothing:
+ mov r0, #0;
+ bx lr
+.size _gcry_sha1_transform_armv7_neon,.-_gcry_sha1_transform_armv7_neon;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha1-armv8-aarch32-ce.S b/comm/third_party/libgcrypt/cipher/sha1-armv8-aarch32-ce.S
new file mode 100644
index 0000000000..bf2b233b01
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-armv8-aarch32-ce.S
@@ -0,0 +1,220 @@
+/* sha1-armv8-aarch32-ce.S - ARM/CE accelerated SHA-1 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) && defined(USE_SHA1)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xCA62C1D6
+.align 4
+gcry_sha1_aarch32_ce_K_VEC:
+.LK_VEC:
+.LK1: .long K1, K1, K1, K1
+.LK2: .long K2, K2, K2, K2
+.LK3: .long K3, K3, K3, K3
+.LK4: .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define qH4 q0
+#define sH4 s0
+#define qH0123 q1
+
+#define qABCD q2
+#define qE0 q3
+#define qE1 q4
+
+#define qT0 q5
+#define qT1 q6
+
+#define qW0 q8
+#define qW1 q9
+#define qW2 q10
+#define qW3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+#define do_add(dst, src0, src1) vadd.u32 dst, src0, src1;
+#define do_sha1su0(w0,w1,w2) sha1su0.32 w0,w1,w2;
+#define do_sha1su1(w0,w3) sha1su1.32 w0,w3;
+
+#define do_rounds(f, e0, e1, t, k, w0, w1, w2, w3, add_fn, sha1su0_fn, sha1su1_fn) \
+ sha1su1_fn( w3, w2 ); \
+ sha1h.32 e0, qABCD; \
+ sha1##f.32 qABCD, e1, t; \
+ add_fn( t, w2, k ); \
+ sha1su0_fn( w0, w1, w2 );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) veor reg, reg;
+
+
+/*
+ * unsigned int
+ * _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+ * size_t nblks)
+ */
+.align 3
+.globl _gcry_sha1_transform_armv8_ce
+.type _gcry_sha1_transform_armv8_ce,%function;
+_gcry_sha1_transform_armv8_ce:
+ /* input:
+ * r0: ctx, CTX
+ * r1: data (64*nblks bytes)
+ * r2: nblks
+ */
+
+ cmp r2, #0;
+ push {r4,lr};
+ beq .Ldo_nothing;
+
+ vpush {q4-q7};
+
+ GET_DATA_POINTER(r4, .LK_VEC, lr);
+
+ veor qH4, qH4
+ vld1.32 {qH0123}, [r0] /* load h0,h1,h2,h3 */
+
+ vld1.32 {qK1-qK2}, [r4]! /* load K1,K2 */
+ vldr sH4, [r0, #16] /* load h4 */
+ vld1.32 {qK3-qK4}, [r4] /* load K3,K4 */
+
+ vld1.8 {qW0-qW1}, [r1]!
+ vmov qABCD, qH0123
+ vld1.8 {qW2-qW3}, [r1]!
+
+ vrev32.8 qW0, qW0
+ vrev32.8 qW1, qW1
+ vrev32.8 qW2, qW2
+ do_add(qT0, qW0, qK1)
+ vrev32.8 qW3, qW3
+ do_add(qT1, qW1, qK1)
+
+.Loop:
+ do_rounds(c, qE1, qH4, qT0, qK1, qW0, qW1, qW2, qW3, do_add, do_sha1su0, _)
+ subs r2, r2, #1
+ do_rounds(c, qE0, qE1, qT1, qK1, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(c, qE1, qE0, qT0, qK1, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(c, qE0, qE1, qT1, qK2, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(c, qE1, qE0, qT0, qK2, qW0, qW1, qW2, qW3, do_add, do_sha1su0, do_sha1su1)
+
+ do_rounds(p, qE0, qE1, qT1, qK2, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, qE1, qE0, qT0, qK2, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, qE0, qE1, qT1, qK2, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, qE1, qE0, qT0, qK3, qW0, qW1, qW2, qW3, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, qE0, qE1, qT1, qK3, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1)
+
+ do_rounds(m, qE1, qE0, qT0, qK3, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, qE0, qE1, qT1, qK3, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, qE1, qE0, qT0, qK3, qW0, qW1, qW2, qW3, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, qE0, qE1, qT1, qK4, qW1, qW2, qW3, qW0, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, qE1, qE0, qT0, qK4, qW2, qW3, qW0, qW1, do_add, do_sha1su0, do_sha1su1)
+
+ do_rounds(p, qE0, qE1, qT1, qK4, qW3, qW0, qW1, qW2, do_add, do_sha1su0, do_sha1su1)
+ beq .Lend
+
+ vld1.8 {qW0-qW1}, [r1]! /* preload */
+ do_rounds(p, qE1, qE0, qT0, qK4, _ , _ , qW2, qW3, do_add, _, do_sha1su1)
+ vrev32.8 qW0, qW0
+ vld1.8 {qW2}, [r1]!
+ vrev32.8 qW1, qW1
+ do_rounds(p, qE0, qE1, qT1, qK4, _ , _ , qW3, _ , do_add, _, _)
+ vld1.8 {qW3}, [r1]!
+ vrev32.8 qW2, qW2
+ do_rounds(p, qE1, qE0, qT0, _, _, _, _, _, _, _, _)
+ vrev32.8 qW3, qW3
+ do_rounds(p, qE0, qE1, qT1, _, _, _, _, _, _, _, _)
+
+ do_add(qT0, qW0, qK1)
+ vadd.u32 qH4, qE0
+ vadd.u32 qABCD, qH0123
+ do_add(qT1, qW1, qK1)
+
+ vmov qH0123, qABCD
+
+ b .Loop
+
+.Lend:
+ do_rounds(p, qE1, qE0, qT0, qK4, _ , _ , qW2, qW3, do_add, _, do_sha1su1)
+ do_rounds(p, qE0, qE1, qT1, qK4, _ , _ , qW3, _ , do_add, _, _)
+ do_rounds(p, qE1, qE0, qT0, _, _, _, _, _, _, _, _)
+ do_rounds(p, qE0, qE1, qT1, _, _, _, _, _, _, _, _)
+
+ vadd.u32 qH4, qE0
+ vadd.u32 qH0123, qABCD
+
+ CLEAR_REG(qW0)
+ CLEAR_REG(qW1)
+ CLEAR_REG(qW2)
+ CLEAR_REG(qW3)
+ CLEAR_REG(qABCD)
+ CLEAR_REG(qE1)
+ CLEAR_REG(qE0)
+
+ vstr sH4, [r0, #16] /* store h4 */
+ vst1.32 {qH0123}, [r0] /* store h0,h1,h2,h3 */
+
+ CLEAR_REG(qH0123)
+ CLEAR_REG(qH4)
+ vpop {q4-q7}
+
+.Ldo_nothing:
+ mov r0, #0
+ pop {r4,pc}
+.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha1-armv8-aarch64-ce.S b/comm/third_party/libgcrypt/cipher/sha1-armv8-aarch64-ce.S
new file mode 100644
index 0000000000..223268cad2
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-armv8-aarch64-ce.S
@@ -0,0 +1,201 @@
+/* sha1-armv8-aarch64-ce.S - ARM/CE accelerated SHA-1 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA1)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Constants */
+
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xCA62C1D6
+.align 4
+gcry_sha1_aarch64_ce_K_VEC:
+.LK_VEC:
+.LK1: .long K1, K1, K1, K1
+.LK2: .long K2, K2, K2, K2
+.LK3: .long K3, K3, K3, K3
+.LK4: .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define sH4 s0
+#define vH4 v0
+#define vH0123 v1
+
+#define qABCD q2
+#define sABCD s2
+#define vABCD v2
+#define sE0 s3
+#define vE0 v3
+#define sE1 s4
+#define vE1 v4
+
+#define vT0 v5
+#define vT1 v6
+
+#define vW0 v16
+#define vW1 v17
+#define vW2 v18
+#define vW3 v19
+
+#define vK1 v20
+#define vK2 v21
+#define vK3 v22
+#define vK4 v23
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+#define do_add(dst, src0, src1) add dst.4s, src0.4s, src1.4s;
+#define do_sha1su0(w0,w1,w2) sha1su0 w0.4s,w1.4s,w2.4s;
+#define do_sha1su1(w0,w3) sha1su1 w0.4s,w3.4s;
+
+#define do_rounds(f, e0, e1, t, k, w0, w1, w2, w3, add_fn, sha1su0_fn, sha1su1_fn) \
+ sha1su1_fn( v##w3, v##w2 ); \
+ sha1h e0, sABCD; \
+ sha1##f qABCD, e1, v##t.4s; \
+ add_fn( v##t, v##w2, v##k ); \
+ sha1su0_fn( v##w0, v##w1, v##w2 );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b;
+
+
+/*
+ * unsigned int
+ * _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+ * size_t nblks)
+ */
+.align 3
+.globl _gcry_sha1_transform_armv8_ce
+ELF(.type _gcry_sha1_transform_armv8_ce,%function;)
+_gcry_sha1_transform_armv8_ce:
+ /* input:
+ * x0: ctx, CTX
+ * x1: data (64*nblks bytes)
+ * x2: nblks
+ */
+ CFI_STARTPROC();
+
+ cbz x2, .Ldo_nothing;
+
+ GET_DATA_POINTER(x4, .LK_VEC);
+
+ ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */
+ ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */
+ ldr sH4, [x0, #16] /* load h4 */
+
+ ld1 {vW0.16b-vW3.16b}, [x1], #64
+ mov vABCD.16b, vH0123.16b
+
+ rev32 vW0.16b, vW0.16b
+ rev32 vW1.16b, vW1.16b
+ rev32 vW2.16b, vW2.16b
+ do_add(vT0, vW0, vK1)
+ rev32 vW3.16b, vW3.16b
+ do_add(vT1, vW1, vK1)
+
+.Loop:
+ do_rounds(c, sE1, sH4, T0, K1, W0, W1, W2, W3, do_add, do_sha1su0, _)
+ sub x2, x2, #1
+ do_rounds(c, sE0, sE1, T1, K1, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(c, sE1, sE0, T0, K1, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(c, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(c, sE1, sE0, T0, K2, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1)
+
+ do_rounds(p, sE0, sE1, T1, K2, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, sE1, sE0, T0, K2, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(p, sE0, sE1, T1, K3, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1)
+
+ do_rounds(m, sE1, sE0, T0, K3, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, sE0, sE1, T1, K3, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, sE0, sE1, T1, K4, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1)
+ do_rounds(m, sE1, sE0, T0, K4, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1)
+
+ do_rounds(p, sE0, sE1, T1, K4, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1)
+ cbz x2, .Lend
+
+ ld1 {vW0.16b-vW1.16b}, [x1], #32 /* preload */
+ do_rounds(p, sE1, sE0, T0, K4, _ , _ , W2, W3, do_add, _, do_sha1su1)
+ rev32 vW0.16b, vW0.16b
+ ld1 {vW2.16b}, [x1], #16
+ rev32 vW1.16b, vW1.16b
+ do_rounds(p, sE0, sE1, T1, K4, _ , _ , W3, _ , do_add, _, _)
+ ld1 {vW3.16b}, [x1], #16
+ rev32 vW2.16b, vW2.16b
+ do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _)
+ rev32 vW3.16b, vW3.16b
+ do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _)
+
+ do_add(vT0, vW0, vK1)
+ add vH4.2s, vH4.2s, vE0.2s
+ add vABCD.4s, vABCD.4s, vH0123.4s
+ do_add(vT1, vW1, vK1)
+
+ mov vH0123.16b, vABCD.16b
+
+ b .Loop
+
+.Lend:
+ do_rounds(p, sE1, sE0, T0, K4, _ , _ , W2, W3, do_add, _, do_sha1su1)
+ do_rounds(p, sE0, sE1, T1, K4, _ , _ , W3, _ , do_add, _, _)
+ do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _)
+ do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _)
+
+ add vH4.2s, vH4.2s, vE0.2s
+ add vH0123.4s, vH0123.4s, vABCD.4s
+
+ CLEAR_REG(vW0)
+ CLEAR_REG(vW1)
+ CLEAR_REG(vW2)
+ CLEAR_REG(vW3)
+ CLEAR_REG(vABCD)
+ CLEAR_REG(vE1)
+ CLEAR_REG(vE0)
+
+ str sH4, [x0, #16] /* store h4 */
+ st1 {vH0123.4s}, [x0] /* store h0,h1,h2,h3 */
+
+ CLEAR_REG(vH0123)
+ CLEAR_REG(vH4)
+
+.Ldo_nothing:
+ mov x0, #0
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;)
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha1-avx-amd64.S b/comm/third_party/libgcrypt/cipher/sha1-avx-amd64.S
new file mode 100644
index 0000000000..85876ad418
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-avx-amd64.S
@@ -0,0 +1,429 @@
+/* sha1-avx-amd64.S - Intel AVX accelerated SHA-1 transform function
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ * "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+.text
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xCA62C1D6
+.align 16
+.LK_XMM:
+.LK1: .long K1, K1, K1, K1
+.LK2: .long K2, K2, K2, K2
+.LK3: .long K3, K3, K3, K3
+.LK4: .long K4, K4, K4, K4
+
+.Lbswap_shufb_ctl:
+ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %eax
+#define b %ebx
+#define c %ecx
+#define d %edx
+#define e %edi
+
+#define RT0 %esi
+#define RT1 %ebp
+
+#define Wtmp0 %xmm0
+#define Wtmp1 %xmm1
+
+#define W0 %xmm2
+#define W1 %xmm3
+#define W2 %xmm4
+#define W3 %xmm5
+#define W4 %xmm6
+#define W5 %xmm7
+#define W6 %xmm8
+#define W7 %xmm9
+
+#define BSWAP_REG %xmm10
+
+
+/* Round function macros. */
+
+#define WK(i) (((i) & 15) * 4)(%rsp)
+
+#define R_F1(a,b,c,d,e,i) \
+ movl c, RT0; \
+ addl WK(i), e; \
+ xorl d, RT0; \
+ movl a, RT1; \
+ andl b, RT0; \
+ shldl $30, b, b; \
+ xorl d, RT0; \
+ leal (RT0,e), e; \
+ shldl $5, RT1, RT1; \
+ addl RT1, e;
+
+#define R_F2(a,b,c,d,e,i) \
+ movl c, RT0; \
+ addl WK(i), e; \
+ xorl b, RT0; \
+ shldl $30, b, b; \
+ xorl d, RT0; \
+ movl a, RT1; \
+ leal (RT0,e), e; \
+ shldl $5, RT1, RT1; \
+ addl RT1, e;
+
+#define R_F3(a,b,c,d,e,i) \
+ movl c, RT0; \
+ movl b, RT1; \
+ xorl b, RT0; \
+ andl c, RT1; \
+ andl d, RT0; \
+ addl RT1, e; \
+ addl WK(i), e; \
+ shldl $30, b, b; \
+ movl a, RT1; \
+ leal (RT0,e), e; \
+ shldl $5, RT1, RT1; \
+ addl RT1, e;
+
+#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i)
+
+#define R(a,b,c,d,e,f,i) \
+ R_##f(a,b,c,d,e,i)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+ vmovdqu (4*(i))(RDATA), tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+ vpshufb BSWAP_REG, tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0) \
+ vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+ vmovdqa tmp0, WK(i&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpalignr $8, W_m16, W_m12, W; \
+ vpsrldq $4, W_m04, tmp0; \
+ vpxor W_m08, W, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpxor W_m16, tmp0, tmp0; \
+ vpxor tmp0, W, W; \
+ vpslld $1, W, tmp0; \
+ vpslldq $12, W, tmp1; \
+ vpsrld $31, W, W;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpor W, tmp0, tmp0; \
+ vpsrld $30, tmp1, W; \
+ vpslld $2, tmp1, tmp1;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpxor W, tmp0, tmp0; \
+ vpxor tmp1, tmp0, W; \
+ vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \
+ vmovdqa tmp0, WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpxor W_m28, W, W; \
+ vpalignr $8, W_m08, W_m04, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpxor W_m16, W, W; \
+ vpxor tmp0, W, W;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpsrld $30, W, tmp0; \
+ vpslld $2, W, W;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpor W, tmp0, W; \
+ vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \
+ vmovdqa tmp0, WK((i)&~3);
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_avx (void *ctx, const unsigned char *data,
+ * size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_avx
+ELF(.type _gcry_sha1_transform_amd64_avx,@function)
+.align 16
+_gcry_sha1_transform_amd64_avx:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: data (64*nblks bytes)
+ * %rdx: nblks
+ */
+ CFI_STARTPROC();
+
+ xorl %eax, %eax;
+ cmpq $0, %rdx;
+ jz .Lret;
+
+ vzeroupper;
+
+ movq %rdx, RNBLKS;
+ movq %rdi, RSTATE;
+ movq %rsi, RDATA;
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+
+ movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+ subq $(16*4), %rsp;
+ andq $(~31), %rsp;
+
+ /* Get the values of the chaining variables. */
+ movl state_h0(RSTATE), a;
+ movl state_h1(RSTATE), b;
+ movl state_h2(RSTATE), c;
+ movl state_h3(RSTATE), d;
+ movl state_h4(RSTATE), e;
+
+ vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+
+ /* Precalc 0-15. */
+ W_PRECALC_00_15_0(0, W0, Wtmp0);
+ W_PRECALC_00_15_1(1, W0, Wtmp0);
+ W_PRECALC_00_15_2(2, W0, Wtmp0);
+ W_PRECALC_00_15_3(3, W0, Wtmp0);
+ W_PRECALC_00_15_0(4, W7, Wtmp0);
+ W_PRECALC_00_15_1(5, W7, Wtmp0);
+ W_PRECALC_00_15_2(6, W7, Wtmp0);
+ W_PRECALC_00_15_3(7, W7, Wtmp0);
+ W_PRECALC_00_15_0(8, W6, Wtmp0);
+ W_PRECALC_00_15_1(9, W6, Wtmp0);
+ W_PRECALC_00_15_2(10, W6, Wtmp0);
+ W_PRECALC_00_15_3(11, W6, Wtmp0);
+ W_PRECALC_00_15_0(12, W5, Wtmp0);
+ W_PRECALC_00_15_1(13, W5, Wtmp0);
+ W_PRECALC_00_15_2(14, W5, Wtmp0);
+ W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+.align 8
+.Loop:
+ addq $64, RDATA;
+
+ /* Transform 0-15 + Precalc 16-31. */
+ R( a, b, c, d, e, F1, 0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+
+ /* Transform 16-63 + Precalc 32-79. */
+ R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+
+ decq RNBLKS;
+ jz .Lend;
+
+ /* Transform 64-79 + Precalc 0-15 of next block. */
+ R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+ R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+ R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0);
+ R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+ R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+ R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+ R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0);
+ R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+ R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+ R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+ R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0);
+ R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+ R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+ R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+ R( c, d, e, a, b, F4, 78 );
+ addl state_h0(RSTATE), a; W_PRECALC_00_15_2(14, W5, Wtmp0);
+ R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ jmp .Loop;
+
+.align 16
+.Lend:
+ vzeroall;
+
+ /* Transform 64-79 + burn stack */
+ R( b, c, d, e, a, F4, 64 );
+ R( a, b, c, d, e, F4, 65 );
+ R( e, a, b, c, d, F4, 66 );
+ R( d, e, a, b, c, F4, 67 );
+ R( c, d, e, a, b, F4, 68 );
+ R( b, c, d, e, a, F4, 69 );
+ R( a, b, c, d, e, F4, 70 );
+ R( e, a, b, c, d, F4, 71 );
+ R( d, e, a, b, c, F4, 72 );
+ R( c, d, e, a, b, F4, 73 );
+ R( b, c, d, e, a, F4, 74 );
+ R( a, b, c, d, e, F4, 75 );
+ R( e, a, b, c, d, F4, 76 ); vmovdqa %xmm0, (0*16)(%rsp);
+ R( d, e, a, b, c, F4, 77 ); vmovdqa %xmm0, (1*16)(%rsp);
+ R( c, d, e, a, b, F4, 78 ); vmovdqa %xmm0, (2*16)(%rsp);
+ addl state_h0(RSTATE), a;
+ R( b, c, d, e, a, F4, 79 );
+
+ /* 16*4/16-1 = 3 */
+ vmovdqa %xmm0, (3*16)(%rsp);
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
+
+ popq %rbp;
+ CFI_POP(%rbp);
+ popq %rbx;
+ CFI_POP(%rbx);
+
+ /* stack already burned */
+ xorl %eax, %eax;
+
+.Lret:
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_avx,
+ .-_gcry_sha1_transform_amd64_avx;)
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha1-avx-bmi2-amd64.S b/comm/third_party/libgcrypt/cipher/sha1-avx-bmi2-amd64.S
new file mode 100644
index 0000000000..5dfcdca979
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-avx-bmi2-amd64.S
@@ -0,0 +1,441 @@
+/* sha1-avx-bmi2-amd64.S - Intel AVX/BMI2 accelerated SHA-1 transform function
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ * "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+.text
+.align 16
+.Lbswap_shufb_ctl:
+ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+.LK1: .long 0x5A827999
+.LK2: .long 0x6ED9EBA1
+.LK3: .long 0x8F1BBCDC
+.LK4: .long 0xCA62C1D6
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %esi
+#define b %edi
+#define c %ebp
+#define d %edx
+#define e %ecx
+#define ne %ebx
+
+#define RT0 %eax
+#define RT1 %r12d
+
+#define Wtmp0 %xmm0
+#define Wtmp1 %xmm1
+
+#define W0 %xmm2
+#define W1 %xmm3
+#define W2 %xmm4
+#define W3 %xmm5
+#define W4 %xmm6
+#define W5 %xmm7
+#define W6 %xmm8
+#define W7 %xmm9
+
+#define BSWAP_REG %xmm10
+
+#define K1 %xmm11
+#define K2 %xmm12
+#define K3 %xmm13
+#define K4 %xmm14
+
+
+/* Round function macros. */
+
+#define WK(i) (((i) & 15) * 4)(%rsp)
+
+#define R_F1(a,b,c,d,e,i) \
+ movl c, RT0; \
+ andn d, b, RT1; \
+ addl WK(i), e; \
+ andl b, RT0; \
+ rorxl $2, b, b; \
+ addl RT1, e; \
+ addl ne, a; \
+ leal (RT0,e), ne; \
+ rorxl $27, a, e;
+
+#define R_F2(a,b,c,d,e,i) \
+ movl c, RT0; \
+ addl WK(i), e; \
+ xorl b, RT0; \
+ rorxl $2, b, b; \
+ xorl d, RT0; \
+ addl ne, a; \
+ leal (RT0,e), ne; \
+ rorxl $27, a, e;
+
+#define R_F3(a,b,c,d,e,i) \
+ movl c, RT0; \
+ movl b, RT1; \
+ addl WK(i), e; \
+ xorl b, RT0; \
+ andl c, RT1; \
+ andl d, RT0; \
+ addl RT1, e; \
+ rorxl $2, b, b; \
+ addl ne, a; \
+ leal (RT0,e), ne; \
+ rorxl $27, a, e;
+
+#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i)
+
+#define R(a,b,c,d,e,f,i) \
+ R_##f(a,b,c,d,e,i)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+ vmovdqu (4*(i))(RDATA), tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+ vpshufb BSWAP_REG, tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0, K) \
+ vpaddd K, W, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+ vmovdqa tmp0, WK(i&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpalignr $8, W_m16, W_m12, W; \
+ vpsrldq $4, W_m04, tmp0; \
+ vpxor W_m08, W, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpxor W_m16, tmp0, tmp0; \
+ vpxor tmp0, W, W; \
+ vpslld $1, W, tmp0; \
+ vpslldq $12, W, tmp1; \
+ vpsrld $31, W, W;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpor W, tmp0, tmp0; \
+ vpsrld $30, tmp1, W; \
+ vpslld $2, tmp1, tmp1;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1, K) \
+ vpxor W, tmp0, tmp0; \
+ vpxor tmp1, tmp0, W; \
+ vpaddd K, W, tmp0; \
+ vmovdqa tmp0, WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpxor W_m28, W, W; \
+ vpalignr $8, W_m08, W_m04, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpxor W_m16, W, W; \
+ vpxor tmp0, W, W;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpsrld $30, W, tmp0; \
+ vpslld $2, W, W;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0, K) \
+ vpor W, tmp0, W; \
+ vpaddd K, W, tmp0; \
+ vmovdqa tmp0, WK((i)&~3);
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
+ * size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_avx_bmi2
+ELF(.type _gcry_sha1_transform_amd64_avx_bmi2,@function)
+.align 16
+_gcry_sha1_transform_amd64_avx_bmi2:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: data (64*nblks bytes)
+ * %rdx: nblks
+ */
+ CFI_STARTPROC();
+
+ xorl %eax, %eax;
+ cmpq $0, %rdx;
+ jz .Lret;
+
+ vzeroupper;
+
+ movq %rdx, RNBLKS;
+ movq %rdi, RSTATE;
+ movq %rsi, RDATA;
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %r12;
+ CFI_PUSH(%r12);
+
+ movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+ subq $(16*4), %rsp;
+ andq $(~31), %rsp;
+
+ /* Get the values of the chaining variables. */
+ movl state_h0(RSTATE), a;
+ movl state_h1(RSTATE), b;
+ movl state_h2(RSTATE), c;
+ movl state_h3(RSTATE), d;
+ movl state_h4(RSTATE), e;
+ xorl ne, ne;
+
+ vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+ vpbroadcastd .LK1 rRIP, K1;
+ vpbroadcastd .LK2 rRIP, K2;
+ vpbroadcastd .LK3 rRIP, K3;
+ vpbroadcastd .LK4 rRIP, K4;
+
+ /* Precalc 0-15. */
+ W_PRECALC_00_15_0(0, W0, Wtmp0);
+ W_PRECALC_00_15_1(1, W0, Wtmp0);
+ W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+ W_PRECALC_00_15_3(3, W0, Wtmp0);
+ W_PRECALC_00_15_0(4, W7, Wtmp0);
+ W_PRECALC_00_15_1(5, W7, Wtmp0);
+ W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+ W_PRECALC_00_15_3(7, W7, Wtmp0);
+ W_PRECALC_00_15_0(8, W6, Wtmp0);
+ W_PRECALC_00_15_1(9, W6, Wtmp0);
+ W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+ W_PRECALC_00_15_3(11, W6, Wtmp0);
+ W_PRECALC_00_15_0(12, W5, Wtmp0);
+ W_PRECALC_00_15_1(13, W5, Wtmp0);
+ W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+ W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+.align 8
+.Loop:
+ addq $64, RDATA;
+
+ /* Transform 0-15 + Precalc 16-31. */
+ R( a, b, c, d, e, F1, 0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1, K1);
+ R( b, c, d, e, a, F1, 4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1, K2);
+ R( c, d, e, a, b, F1, 8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1, K2);
+ R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1, K2);
+
+ /* Transform 16-63 + Precalc 32-79. */
+ R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0, K2);
+ R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0, K2);
+ R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0, K3);
+ R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0, K3);
+ R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0, K3);
+ R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0, K3);
+ R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0, K3);
+ R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0, K4);
+ R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0, K4);
+ R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0, K4);
+ R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0, K4);
+ R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0, K4);
+
+ decq RNBLKS;
+ jz .Lend;
+
+ /* Transform 64-79 + Precalc 0-15 of next block. */
+ R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+ R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+ R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+ R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+ R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+ R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+ R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+ R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+ R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+ R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+ R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+ R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+ R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+ R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+ R( c, d, e, a, b, F4, 78 );
+ addl state_h0(RSTATE), a; W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+ R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+ addl ne, a;
+ xorl ne, ne;
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ jmp .Loop;
+
+.align 16
+.Lend:
+ vzeroall;
+
+ /* Transform 64-79 + burn stack */
+ R( b, c, d, e, a, F4, 64 );
+ R( a, b, c, d, e, F4, 65 );
+ R( e, a, b, c, d, F4, 66 );
+ R( d, e, a, b, c, F4, 67 );
+ R( c, d, e, a, b, F4, 68 );
+ R( b, c, d, e, a, F4, 69 );
+ R( a, b, c, d, e, F4, 70 );
+ R( e, a, b, c, d, F4, 71 );
+ R( d, e, a, b, c, F4, 72 );
+ R( c, d, e, a, b, F4, 73 );
+ R( b, c, d, e, a, F4, 74 );
+ R( a, b, c, d, e, F4, 75 );
+ R( e, a, b, c, d, F4, 76 ); vmovdqa %xmm0, (0*16)(%rsp);
+ R( d, e, a, b, c, F4, 77 ); vmovdqa %xmm0, (1*16)(%rsp);
+ R( c, d, e, a, b, F4, 78 ); vmovdqa %xmm0, (2*16)(%rsp);
+ addl state_h0(RSTATE), a;
+ R( b, c, d, e, a, F4, 79 );
+ addl ne, a;
+ xorl ne, ne;
+
+ /* 16*4/16-1 = 3 */
+ vmovdqa %xmm0, (3*16)(%rsp);
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
+
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbp;
+ CFI_POP(%rbp);
+ popq %rbx;
+ CFI_POP(%rbx);
+
+ /* stack already burned */
+ xorl %eax, %eax;
+
+.Lret:
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_avx_bmi2,
+ .-_gcry_sha1_transform_amd64_avx_bmi2;)
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S b/comm/third_party/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S
new file mode 100644
index 0000000000..938632305a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S
@@ -0,0 +1,573 @@
+/* sha1-avx2-bmi2-amd64.S - Intel AVX2/BMI2 accelerated SHA-1 transform function
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ * "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_GCC_INLINE_ASM_BMI2) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define WK_STACK_WORDS (80 * 2)
+
+.text
+.align 16
+.Lbswap_shufb_ctl:
+ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+.LK1: .long 0x5A827999
+.LK2: .long 0x6ED9EBA1
+.LK3: .long 0x8F1BBCDC
+.LK4: .long 0xCA62C1D6
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %eax
+#define b %ebx
+#define c %ecx
+#define d %edx
+#define e %edi
+#define ne %r12d
+
+#define RT0 %esi
+#define RT1 %ebp
+
+#define Wtmp0 %ymm0
+#define Wtmp1 %ymm1
+#define Wtmp0x %xmm0
+#define Wtmp1x %xmm1
+
+#define W0 %ymm2
+#define W1 %ymm3
+#define W2 %ymm4
+#define W3 %ymm5
+#define W4 %ymm6
+#define W5 %ymm7
+#define W6 %ymm8
+#define W7 %ymm9
+
+#define BSWAP_REG %ymm10
+
+#define K1 %ymm11
+#define K2 %ymm12
+#define K3 %ymm13
+#define K4 %ymm14
+
+
+/* Round function macros. */
+
+#define WK(i,block) ((block) * 16 + ((i) / 4) * 32 + ((i) % 4) * 4)(%rsp)
+#define PRE_WK(i) ((i) * 4 * 2)(%rsp)
+
+#define R_F1(a,b,c,d,e,i,block) \
+ movl c, RT0; \
+ andn d, b, RT1; \
+ addl WK(i,block), e; \
+ andl b, RT0; \
+ leal (a,ne), a; \
+ rorxl $2, b, b; \
+ addl RT1, e; \
+ rorxl $27, a, ne; \
+ addl RT0, e;
+
+#define R_F2(a,b,c,d,e,i,block) \
+ addl WK(i,block), e; \
+ movl c, RT0; \
+ xorl b, RT0; \
+ leal (a,ne), a; \
+ rorxl $2, b, b; \
+ xorl d, RT0; \
+ addl RT0, e; \
+ rorxl $27, a, ne;
+
+#define R_F3(a,b,c,d,e,i,block) \
+ movl c, RT0; \
+ addl WK(i,block), e; \
+ movl b, RT1; \
+ xorl b, RT0; \
+ leal (a,ne), a; \
+ rorxl $2, b, b; \
+ andl c, RT1; \
+ addl RT1, e; \
+ andl d, RT0; \
+ rorxl $27, a, ne; \
+ addl RT0, e;
+
+#define R_F4(a,b,c,d,e,i,block) R_F2(a,b,c,d,e,i,block)
+
+#define R(a,b,c,d,e,f,i,block) \
+ R_##f(a,b,c,d,e,i,block)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+ vmovdqu (4*(i))(RDATA), tmp0##x; \
+ vinserti128 $1, (4*(i) + 64)(RDATA), tmp0, tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+ vpshufb BSWAP_REG, tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0, K) \
+ vpaddd K, W, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+ vmovdqa tmp0, PRE_WK((i)&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpalignr $8, W_m16, W_m12, W; \
+ vpsrldq $4, W_m04, tmp0; \
+ vpxor W_m08, W, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpxor W_m16, tmp0, tmp0; \
+ vpxor tmp0, W, W; \
+ vpslld $1, W, tmp0; \
+ vpslldq $12, W, tmp1; \
+ vpsrld $31, W, W;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ vpor W, tmp0, tmp0; \
+ vpsrld $30, tmp1, W; \
+ vpslld $2, tmp1, tmp1;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1, K) \
+ vpxor W, tmp0, tmp0; \
+ vpxor tmp1, tmp0, W; \
+ vpaddd K, W, tmp0; \
+ vmovdqa tmp0, PRE_WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpxor W_m28, W, W; \
+ vpalignr $8, W_m08, W_m04, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpxor W_m16, W, W; \
+ vpxor tmp0, W, W;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ vpsrld $30, W, tmp0; \
+ vpslld $2, W, W;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0, K) \
+ vpor W, tmp0, W; \
+ vpaddd K, W, tmp0; \
+ vmovdqa tmp0, PRE_WK((i)&~3);
+
+
+/*
+ * Transform 2*nblks*64 bytes (2*nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_avx2_bmi2 (void *ctx, const unsigned char *data,
+ * size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_avx2_bmi2
+ELF(.type _gcry_sha1_transform_amd64_avx2_bmi2,@function)
+.align 16
+_gcry_sha1_transform_amd64_avx2_bmi2:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: data (64*nblks bytes)
+ * %rdx: nblks (multiple of 2, larger than 0)
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ movq %rdx, RNBLKS;
+ movq %rdi, RSTATE;
+ movq %rsi, RDATA;
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ pushq %r12;
+ CFI_PUSH(%r12);
+
+ movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+ subq $(WK_STACK_WORDS*4), %rsp;
+ andq $(~63), %rsp;
+
+ /* Get the values of the chaining variables. */
+ movl state_h0(RSTATE), a;
+ movl state_h1(RSTATE), b;
+ movl state_h2(RSTATE), c;
+ movl state_h3(RSTATE), d;
+ movl state_h4(RSTATE), e;
+ xorl ne, ne;
+
+ vbroadcasti128 .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+ vpbroadcastd .LK1 rRIP, K1;
+ vpbroadcastd .LK2 rRIP, K2;
+ vpbroadcastd .LK3 rRIP, K3;
+ vpbroadcastd .LK4 rRIP, K4;
+
+ /* Precalc 0-31 for block 1 & 2. */
+ W_PRECALC_00_15_0(0, W0, Wtmp0);
+ W_PRECALC_00_15_1(1, W0, Wtmp0);
+ W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+ W_PRECALC_00_15_3(3, W0, Wtmp0);
+ W_PRECALC_00_15_0(4, W7, Wtmp0);
+ W_PRECALC_00_15_1(5, W7, Wtmp0);
+ W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+ W_PRECALC_00_15_3(7, W7, Wtmp0);
+ W_PRECALC_00_15_0(8, W6, Wtmp0);
+ W_PRECALC_00_15_1(9, W6, Wtmp0);
+ W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+ W_PRECALC_00_15_3(11, W6, Wtmp0);
+ W_PRECALC_00_15_0(12, W5, Wtmp0);
+ W_PRECALC_00_15_1(13, W5, Wtmp0);
+ W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+ W_PRECALC_00_15_3(15, W5, Wtmp0);
+ W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1, K1);
+ W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1, K2);
+ W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1, K2);
+ W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1, K2);
+
+.align 8
+.Loop:
+ addq $(2 * 64), RDATA;
+
+ /* Transform 0-15 for block 1 + Precalc 32-47 for block 1 & 2. */
+ R( a, b, c, d, e, F1, 0, 0 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( e, a, b, c, d, F1, 1, 0 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( d, e, a, b, c, F1, 2, 0 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( c, d, e, a, b, F1, 3, 0 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0, K2);
+ R( b, c, d, e, a, F1, 4, 0 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( a, b, c, d, e, F1, 5, 0 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( e, a, b, c, d, F1, 6, 0 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( d, e, a, b, c, F1, 7, 0 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0, K2);
+ R( c, d, e, a, b, F1, 8, 0 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( b, c, d, e, a, F1, 9, 0 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( a, b, c, d, e, F1, 10, 0 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( e, a, b, c, d, F1, 11, 0 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0, K3);
+ R( d, e, a, b, c, F1, 12, 0 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( c, d, e, a, b, F1, 13, 0 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( b, c, d, e, a, F1, 14, 0 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( a, b, c, d, e, F1, 15, 0 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0, K3);
+
+ /* Transform 16-47 for block 1 + Precalc 48-79 for block 1 & 2. */
+ R( e, a, b, c, d, F1, 16, 0 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( d, e, a, b, c, F1, 17, 0 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( c, d, e, a, b, F1, 18, 0 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( b, c, d, e, a, F1, 19, 0 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0, K3);
+ R( a, b, c, d, e, F2, 20, 0 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( e, a, b, c, d, F2, 21, 0 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( d, e, a, b, c, F2, 22, 0 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( c, d, e, a, b, F2, 23, 0 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0, K3);
+ R( b, c, d, e, a, F2, 24, 0 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( a, b, c, d, e, F2, 25, 0 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( e, a, b, c, d, F2, 26, 0 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( d, e, a, b, c, F2, 27, 0 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0, K3);
+ R( c, d, e, a, b, F2, 28, 0 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( b, c, d, e, a, F2, 29, 0 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( a, b, c, d, e, F2, 30, 0 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( e, a, b, c, d, F2, 31, 0 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0, K4);
+ R( d, e, a, b, c, F2, 32, 0 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( c, d, e, a, b, F2, 33, 0 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( b, c, d, e, a, F2, 34, 0 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( a, b, c, d, e, F2, 35, 0 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0, K4);
+ R( e, a, b, c, d, F2, 36, 0 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( d, e, a, b, c, F2, 37, 0 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( c, d, e, a, b, F2, 38, 0 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( b, c, d, e, a, F2, 39, 0 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0, K4);
+ R( a, b, c, d, e, F3, 40, 0 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( e, a, b, c, d, F3, 41, 0 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( d, e, a, b, c, F3, 42, 0 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( c, d, e, a, b, F3, 43, 0 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0, K4);
+ R( b, c, d, e, a, F3, 44, 0 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( a, b, c, d, e, F3, 45, 0 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( e, a, b, c, d, F3, 46, 0 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( d, e, a, b, c, F3, 47, 0 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0, K4);
+
+ /* Transform 48-79 for block 1. */
+ R( c, d, e, a, b, F3, 48, 0 );
+ R( b, c, d, e, a, F3, 49, 0 );
+ R( a, b, c, d, e, F3, 50, 0 );
+ R( e, a, b, c, d, F3, 51, 0 );
+ R( d, e, a, b, c, F3, 52, 0 );
+ R( c, d, e, a, b, F3, 53, 0 );
+ R( b, c, d, e, a, F3, 54, 0 );
+ R( a, b, c, d, e, F3, 55, 0 );
+ R( e, a, b, c, d, F3, 56, 0 );
+ R( d, e, a, b, c, F3, 57, 0 );
+ R( c, d, e, a, b, F3, 58, 0 );
+ R( b, c, d, e, a, F3, 59, 0 );
+ R( a, b, c, d, e, F4, 60, 0 );
+ R( e, a, b, c, d, F4, 61, 0 );
+ R( d, e, a, b, c, F4, 62, 0 );
+ R( c, d, e, a, b, F4, 63, 0 );
+ R( b, c, d, e, a, F4, 64, 0 );
+ R( a, b, c, d, e, F4, 65, 0 );
+ R( e, a, b, c, d, F4, 66, 0 );
+ R( d, e, a, b, c, F4, 67, 0 );
+ R( c, d, e, a, b, F4, 68, 0 );
+ R( b, c, d, e, a, F4, 69, 0 );
+ R( a, b, c, d, e, F4, 70, 0 );
+ R( e, a, b, c, d, F4, 71, 0 );
+ R( d, e, a, b, c, F4, 72, 0 );
+ R( c, d, e, a, b, F4, 73, 0 );
+ R( b, c, d, e, a, F4, 74, 0 );
+ R( a, b, c, d, e, F4, 75, 0 );
+ R( e, a, b, c, d, F4, 76, 0 );
+ R( d, e, a, b, c, F4, 77, 0 );
+ R( c, d, e, a, b, F4, 78, 0 );
+ addl state_h0(RSTATE), a;
+ R( b, c, d, e, a, F4, 79, 0 );
+ addl ne, a;
+ xorl ne, ne;
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ /* Transform 0-47 for block 2. */
+ R( a, b, c, d, e, F1, 0, 1 );
+ R( e, a, b, c, d, F1, 1, 1 );
+ R( d, e, a, b, c, F1, 2, 1 );
+ R( c, d, e, a, b, F1, 3, 1 );
+ R( b, c, d, e, a, F1, 4, 1 );
+ R( a, b, c, d, e, F1, 5, 1 );
+ R( e, a, b, c, d, F1, 6, 1 );
+ R( d, e, a, b, c, F1, 7, 1 );
+ R( c, d, e, a, b, F1, 8, 1 );
+ R( b, c, d, e, a, F1, 9, 1 );
+ R( a, b, c, d, e, F1, 10, 1 );
+ R( e, a, b, c, d, F1, 11, 1 );
+ R( d, e, a, b, c, F1, 12, 1 );
+ R( c, d, e, a, b, F1, 13, 1 );
+ R( b, c, d, e, a, F1, 14, 1 );
+ R( a, b, c, d, e, F1, 15, 1 );
+ R( e, a, b, c, d, F1, 16, 1 );
+ R( d, e, a, b, c, F1, 17, 1 );
+ R( c, d, e, a, b, F1, 18, 1 );
+ R( b, c, d, e, a, F1, 19, 1 );
+ R( a, b, c, d, e, F2, 20, 1 );
+ R( e, a, b, c, d, F2, 21, 1 );
+ R( d, e, a, b, c, F2, 22, 1 );
+ R( c, d, e, a, b, F2, 23, 1 );
+ R( b, c, d, e, a, F2, 24, 1 );
+ R( a, b, c, d, e, F2, 25, 1 );
+ R( e, a, b, c, d, F2, 26, 1 );
+ R( d, e, a, b, c, F2, 27, 1 );
+ R( c, d, e, a, b, F2, 28, 1 );
+ R( b, c, d, e, a, F2, 29, 1 );
+ R( a, b, c, d, e, F2, 30, 1 );
+ R( e, a, b, c, d, F2, 31, 1 );
+ R( d, e, a, b, c, F2, 32, 1 );
+ R( c, d, e, a, b, F2, 33, 1 );
+ R( b, c, d, e, a, F2, 34, 1 );
+ R( a, b, c, d, e, F2, 35, 1 );
+ R( e, a, b, c, d, F2, 36, 1 );
+ R( d, e, a, b, c, F2, 37, 1 );
+ R( c, d, e, a, b, F2, 38, 1 );
+ R( b, c, d, e, a, F2, 39, 1 );
+ R( a, b, c, d, e, F3, 40, 1 );
+ R( e, a, b, c, d, F3, 41, 1 );
+ R( d, e, a, b, c, F3, 42, 1 );
+ R( c, d, e, a, b, F3, 43, 1 );
+ R( b, c, d, e, a, F3, 44, 1 );
+ R( a, b, c, d, e, F3, 45, 1 );
+ R( e, a, b, c, d, F3, 46, 1 );
+ R( d, e, a, b, c, F3, 47, 1 );
+
+ addq $-2, RNBLKS;
+ jz .Lend;
+
+ /* Transform 48-79 for block 2 + Precalc 0-31 for next two blocks. */
+ R( c, d, e, a, b, F3, 48, 1 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+ R( b, c, d, e, a, F3, 49, 1 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+ R( a, b, c, d, e, F3, 50, 1 ); W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+ R( e, a, b, c, d, F3, 51, 1 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+ R( d, e, a, b, c, F3, 52, 1 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+ R( c, d, e, a, b, F3, 53, 1 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+ R( b, c, d, e, a, F3, 54, 1 ); W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+ R( a, b, c, d, e, F3, 55, 1 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+ R( e, a, b, c, d, F3, 56, 1 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+ R( d, e, a, b, c, F3, 57, 1 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+ R( c, d, e, a, b, F3, 58, 1 ); W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+ R( b, c, d, e, a, F3, 59, 1 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+ R( a, b, c, d, e, F4, 60, 1 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+ R( e, a, b, c, d, F4, 61, 1 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+ R( d, e, a, b, c, F4, 62, 1 ); W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+ R( c, d, e, a, b, F4, 63, 1 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+ R( b, c, d, e, a, F4, 64, 1 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F4, 65, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F4, 66, 1 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F4, 67, 1 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1, K1);
+ R( c, d, e, a, b, F4, 68, 1 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F4, 69, 1 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F4, 70, 1 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F4, 71, 1 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1, K2);
+ R( d, e, a, b, c, F4, 72, 1 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F4, 73, 1 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F4, 74, 1 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F4, 75, 1 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1, K2);
+ R( e, a, b, c, d, F4, 76, 1 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F4, 77, 1 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F4, 78, 1 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ addl state_h0(RSTATE), a; W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1, K2);
+ R( b, c, d, e, a, F4, 79, 1 );
+ addl ne, a;
+ xorl ne, ne;
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ jmp .Loop;
+
+.align 16
+.Lend:
+ vzeroall;
+
+ /* Transform 48-79 for block 2 + burn stack */
+ R( c, d, e, a, b, F3, 48, 1 );
+ R( b, c, d, e, a, F3, 49, 1 );
+ R( a, b, c, d, e, F3, 50, 1 );
+ R( e, a, b, c, d, F3, 51, 1 );
+ R( d, e, a, b, c, F3, 52, 1 );
+ R( c, d, e, a, b, F3, 53, 1 );
+ R( b, c, d, e, a, F3, 54, 1 );
+ R( a, b, c, d, e, F3, 55, 1 );
+ R( e, a, b, c, d, F3, 56, 1 );
+ R( d, e, a, b, c, F3, 57, 1 );
+ R( c, d, e, a, b, F3, 58, 1 );
+ R( b, c, d, e, a, F3, 59, 1 );
+ R( a, b, c, d, e, F4, 60, 1 ); vmovdqa %ymm0, (0*32)(%rsp);
+ R( e, a, b, c, d, F4, 61, 1 ); vmovdqa %ymm0, (1*32)(%rsp);
+ R( d, e, a, b, c, F4, 62, 1 ); vmovdqa %ymm0, (2*32)(%rsp);
+ R( c, d, e, a, b, F4, 63, 1 ); vmovdqa %ymm0, (3*32)(%rsp);
+ R( b, c, d, e, a, F4, 64, 1 ); vmovdqa %ymm0, (4*32)(%rsp);
+ R( a, b, c, d, e, F4, 65, 1 ); vmovdqa %ymm0, (5*32)(%rsp);
+ R( e, a, b, c, d, F4, 66, 1 ); vmovdqa %ymm0, (6*32)(%rsp);
+ R( d, e, a, b, c, F4, 67, 1 ); vmovdqa %ymm0, (7*32)(%rsp);
+ R( c, d, e, a, b, F4, 68, 1 ); vmovdqa %ymm0, (8*32)(%rsp);
+ R( b, c, d, e, a, F4, 69, 1 ); vmovdqa %ymm0, (9*32)(%rsp);
+ R( a, b, c, d, e, F4, 70, 1 ); vmovdqa %ymm0, (10*32)(%rsp);
+ R( e, a, b, c, d, F4, 71, 1 ); vmovdqa %ymm0, (11*32)(%rsp);
+ R( d, e, a, b, c, F4, 72, 1 ); vmovdqa %ymm0, (12*32)(%rsp);
+ R( c, d, e, a, b, F4, 73, 1 ); vmovdqa %ymm0, (13*32)(%rsp);
+ R( b, c, d, e, a, F4, 74, 1 ); vmovdqa %ymm0, (14*32)(%rsp);
+ R( a, b, c, d, e, F4, 75, 1 ); vmovdqa %ymm0, (15*32)(%rsp);
+ R( e, a, b, c, d, F4, 76, 1 ); vmovdqa %ymm0, (16*32)(%rsp);
+ R( d, e, a, b, c, F4, 77, 1 ); vmovdqa %ymm0, (17*32)(%rsp);
+ R( c, d, e, a, b, F4, 78, 1 ); vmovdqa %ymm0, (18*32)(%rsp);
+ addl state_h0(RSTATE), a;
+ R( b, c, d, e, a, F4, 79, 1 );
+ addl ne, a;
+ xorl ne, ne;
+
+ /* WK_STACK_WORDS*4/32-1 = 19 */
+ vmovdqa %ymm0, (19*32)(%rsp);
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
+
+ popq %r12;
+ CFI_POP(%r12);
+ popq %rbp;
+ CFI_POP(%rbp);
+ popq %rbx;
+ CFI_POP(%rbx);
+
+ /* stack already burned */
+ xorl %eax, %eax;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_avx2_bmi2,
+ .-_gcry_sha1_transform_amd64_avx2_bmi2;)
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha1-intel-shaext.c b/comm/third_party/libgcrypt/cipher/sha1-intel-shaext.c
new file mode 100644
index 0000000000..ddf2be2aa1
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-intel-shaext.c
@@ -0,0 +1,292 @@
+/* sha1-intel-shaext.S - SHAEXT accelerated SHA-1 transform function
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "types.h"
+
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+ defined(HAVE_GCC_INLINE_ASM_SSE41) && defined(USE_SHA1) && \
+ defined(ENABLE_SHAEXT_SUPPORT)
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+# pragma GCC target("no-sse")
+#endif
+#if __clang__
+# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
+#endif
+
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+
+/* Two macros to be called prior and after the use of SHA-EXT
+ instructions. There should be no external function calls between
+ the use of these macros. There purpose is to make sure that the
+ SSE regsiters are cleared and won't reveal any information about
+ the key or the data. */
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define shaext_prepare_variable char win64tmp[2*16]
+# define shaext_prepare_variable_size sizeof(win64tmp)
+# define shaext_prepare() \
+ do { asm volatile ("movdqu %%xmm6, (%0)\n" \
+ "movdqu %%xmm7, (%1)\n" \
+ : \
+ : "r" (&win64tmp[0]), "r" (&win64tmp[16]) \
+ : "memory"); \
+ } while (0)
+# define shaext_cleanup(tmp0,tmp1) \
+ do { asm volatile ("movdqu (%0), %%xmm6\n" \
+ "movdqu (%1), %%xmm7\n" \
+ "pxor %%xmm0, %%xmm0\n" \
+ "pxor %%xmm1, %%xmm1\n" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "movdqa %%xmm0, (%2)\n\t" \
+ "movdqa %%xmm0, (%3)\n\t" \
+ : \
+ : "r" (&win64tmp[0]), "r" (&win64tmp[16]), \
+ "r" (tmp0), "r" (tmp1) \
+ : "memory"); \
+ } while (0)
+#else
+# define shaext_prepare_variable
+# define shaext_prepare_variable_size 0
+# define shaext_prepare() do { } while (0)
+# define shaext_cleanup(tmp0,tmp1) \
+ do { asm volatile ("pxor %%xmm0, %%xmm0\n" \
+ "pxor %%xmm1, %%xmm1\n" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "pxor %%xmm6, %%xmm6\n" \
+ "pxor %%xmm7, %%xmm7\n" \
+ "movdqa %%xmm0, (%0)\n\t" \
+ "movdqa %%xmm0, (%1)\n\t" \
+ : \
+ : "r" (tmp0), "r" (tmp1) \
+ : "memory"); \
+ } while (0)
+#endif
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ */
+unsigned int ASM_FUNC_ATTR
+_gcry_sha1_transform_intel_shaext(void *state, const unsigned char *data,
+ size_t nblks)
+{
+ static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ char save_buf[2 * 16 + 15];
+ char *abcd_save;
+ char *e_save;
+ shaext_prepare_variable;
+
+ if (nblks == 0)
+ return 0;
+
+ shaext_prepare ();
+
+ asm volatile ("" : "=r" (abcd_save) : "0" (save_buf) : "memory");
+ abcd_save = abcd_save + (-(uintptr_t)abcd_save & 15);
+ e_save = abcd_save + 16;
+
+ /* byteswap mask => XMM7 */
+ asm volatile ("movdqa %[mask], %%xmm7\n\t" /* Preload mask */
+ :
+ : [mask] "m" (*be_mask)
+ : "memory");
+
+ /* Load state.. ABCD => XMM4, E => XMM5 */
+ asm volatile ("movd 16(%[state]), %%xmm5\n\t"
+ "movdqu (%[state]), %%xmm4\n\t"
+ "pslldq $12, %%xmm5\n\t"
+ "pshufd $0x1b, %%xmm4, %%xmm4\n\t"
+ "movdqa %%xmm5, (%[e_save])\n\t"
+ "movdqa %%xmm4, (%[abcd_save])\n\t"
+ :
+ : [state] "r" (state), [abcd_save] "r" (abcd_save),
+ [e_save] "r" (e_save)
+ : "memory" );
+
+ /* DATA => XMM[0..4] */
+ asm volatile ("movdqu 0(%[data]), %%xmm0\n\t"
+ "movdqu 16(%[data]), %%xmm1\n\t"
+ "movdqu 32(%[data]), %%xmm2\n\t"
+ "movdqu 48(%[data]), %%xmm3\n\t"
+ "pshufb %%xmm7, %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm1\n\t"
+ "pshufb %%xmm7, %%xmm2\n\t"
+ "pshufb %%xmm7, %%xmm3\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+ data += 64;
+
+ while (1)
+ {
+ /* Round 0..3 */
+ asm volatile ("paddd %%xmm0, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t" /* ABCD => E1 */
+ "sha1rnds4 $0, %%xmm5, %%xmm4\n\t"
+ ::: "memory" );
+
+ /* Round 4..7 */
+ asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1rnds4 $0, %%xmm6, %%xmm4\n\t"
+ "sha1msg1 %%xmm1, %%xmm0\n\t"
+ ::: "memory" );
+
+ /* Round 8..11 */
+ asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t"
+ "sha1rnds4 $0, %%xmm5, %%xmm4\n\t"
+ "sha1msg1 %%xmm2, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ ::: "memory" );
+
+#define ROUND(imm, E0, E1, MSG0, MSG1, MSG2, MSG3) \
+ asm volatile ("sha1nexte %%"MSG0", %%"E0"\n\t" \
+ "movdqa %%xmm4, %%"E1"\n\t" \
+ "sha1msg2 %%"MSG0", %%"MSG1"\n\t" \
+ "sha1rnds4 $"imm", %%"E0", %%xmm4\n\t" \
+ "sha1msg1 %%"MSG0", %%"MSG3"\n\t" \
+ "pxor %%"MSG0", %%"MSG2"\n\t" \
+ ::: "memory" )
+
+ /* Rounds 12..15 to 64..67 */
+ ROUND("0", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("0", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+ ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+ ROUND("1", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+ ROUND("1", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("1", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+ ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+ ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+ ROUND("2", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("2", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+ ROUND("2", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+ ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+ ROUND("3", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("3", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+
+ if (--nblks == 0)
+ break;
+
+ /* Round 68..71 */
+ asm volatile ("movdqu 0(%[data]), %%xmm0\n\t"
+ "sha1nexte %%xmm1, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1msg2 %%xmm1, %%xmm2\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm1, %%xmm3\n\t"
+ "pshufb %%xmm7, %%xmm0\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+
+ /* Round 72..75 */
+ asm volatile ("movdqu 16(%[data]), %%xmm1\n\t"
+ "sha1nexte %%xmm2, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t"
+ "sha1msg2 %%xmm2, %%xmm3\n\t"
+ "sha1rnds4 $3, %%xmm5, %%xmm4\n\t"
+ "pshufb %%xmm7, %%xmm1\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+
+ /* Round 76..79 */
+ asm volatile ("movdqu 32(%[data]), %%xmm2\n\t"
+ "sha1nexte %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ "pshufb %%xmm7, %%xmm2\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+
+ /* Merge states, store current. */
+ asm volatile ("movdqu 48(%[data]), %%xmm3\n\t"
+ "sha1nexte (%[e_save]), %%xmm5\n\t"
+ "paddd (%[abcd_save]), %%xmm4\n\t"
+ "pshufb %%xmm7, %%xmm3\n\t"
+ "movdqa %%xmm5, (%[e_save])\n\t"
+ "movdqa %%xmm4, (%[abcd_save])\n\t"
+ :
+ : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save),
+ [data] "r" (data)
+ : "memory" );
+
+ data += 64;
+ }
+
+ /* Round 68..71 */
+ asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1msg2 %%xmm1, %%xmm2\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm1, %%xmm3\n\t"
+ ::: "memory" );
+
+ /* Round 72..75 */
+ asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t"
+ "sha1msg2 %%xmm2, %%xmm3\n\t"
+ "sha1rnds4 $3, %%xmm5, %%xmm4\n\t"
+ ::: "memory" );
+
+ /* Round 76..79 */
+ asm volatile ("sha1nexte %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ ::: "memory" );
+
+ /* Merge states. */
+ asm volatile ("sha1nexte (%[e_save]), %%xmm5\n\t"
+ "paddd (%[abcd_save]), %%xmm4\n\t"
+ :
+ : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save)
+ : "memory" );
+
+ /* Save state */
+ asm volatile ("pshufd $0x1b, %%xmm4, %%xmm4\n\t"
+ "psrldq $12, %%xmm5\n\t"
+ "movdqu %%xmm4, (%[state])\n\t"
+ "movd %%xmm5, 16(%[state])\n\t"
+ :
+ : [state] "r" (state)
+ : "memory" );
+
+ shaext_cleanup (abcd_save, e_save);
+ return 0;
+}
+
+#if __clang__
+# pragma clang attribute pop
+#endif
+
+#endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */
diff --git a/comm/third_party/libgcrypt/cipher/sha1-ssse3-amd64.S b/comm/third_party/libgcrypt/cipher/sha1-ssse3-amd64.S
new file mode 100644
index 0000000000..db62928ad3
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1-ssse3-amd64.S
@@ -0,0 +1,437 @@
+/* sha1-ssse3-amd64.S - Intel SSSE3 accelerated SHA-1 transform function
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ * "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+.text
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xCA62C1D6
+.align 16
+.LK_XMM:
+.LK1: .long K1, K1, K1, K1
+.LK2: .long K2, K2, K2, K2
+.LK3: .long K3, K3, K3, K3
+.LK4: .long K4, K4, K4, K4
+
+.Lbswap_shufb_ctl:
+ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %eax
+#define b %ebx
+#define c %ecx
+#define d %edx
+#define e %edi
+
+#define RT0 %esi
+#define RT1 %ebp
+
+#define Wtmp0 %xmm0
+#define Wtmp1 %xmm1
+
+#define W0 %xmm2
+#define W1 %xmm3
+#define W2 %xmm4
+#define W3 %xmm5
+#define W4 %xmm6
+#define W5 %xmm7
+#define W6 %xmm8
+#define W7 %xmm9
+
+#define BSWAP_REG %xmm10
+
+
+/* Round function macros. */
+
+#define WK(i) (((i) & 15) * 4)(%rsp)
+
+#define R_F1(a,b,c,d,e,i) \
+ movl c, RT0; \
+ addl WK(i), e; \
+ xorl d, RT0; \
+ movl a, RT1; \
+ andl b, RT0; \
+ roll $30, b; \
+ xorl d, RT0; \
+ leal (RT0,e), e; \
+ roll $5, RT1; \
+ addl RT1, e;
+
+#define R_F2(a,b,c,d,e,i) \
+ movl c, RT0; \
+ addl WK(i), e; \
+ xorl b, RT0; \
+ roll $30, b; \
+ xorl d, RT0; \
+ movl a, RT1; \
+ leal (RT0,e), e; \
+ roll $5, RT1; \
+ addl RT1, e;
+
+#define R_F3(a,b,c,d,e,i) \
+ movl c, RT0; \
+ movl b, RT1; \
+ xorl b, RT0; \
+ andl c, RT1; \
+ andl d, RT0; \
+ addl RT1, e; \
+ addl WK(i), e; \
+ roll $30, b; \
+ movl a, RT1; \
+ leal (RT0,e), e; \
+ roll $5, RT1; \
+ addl RT1, e;
+
+#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i)
+
+#define R(a,b,c,d,e,f,i) \
+ R_##f(a,b,c,d,e,i)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+ movdqu (4*(i))(RDATA), tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+ pshufb BSWAP_REG, tmp0; \
+ movdqa tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0) \
+ paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+ movdqa tmp0, WK(i&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ movdqa W_m12, W; \
+ palignr $8, W_m16, W; \
+ movdqa W_m04, tmp0; \
+ psrldq $4, tmp0; \
+ pxor W_m08, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ pxor W_m16, tmp0; \
+ pxor tmp0, W; \
+ movdqa W, tmp1; \
+ movdqa W, tmp0; \
+ pslldq $12, tmp1;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ psrld $31, W; \
+ pslld $1, tmp0; \
+ por W, tmp0; \
+ movdqa tmp1, W; \
+ psrld $30, tmp1; \
+ pslld $2, W;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+ pxor W, tmp0; \
+ pxor tmp1, tmp0; \
+ movdqa tmp0, W; \
+ paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \
+ movdqa tmp0, WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ movdqa W_m04, tmp0; \
+ pxor W_m28, W; \
+ palignr $8, W_m08, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ pxor W_m16, W; \
+ pxor tmp0, W; \
+ movdqa W, tmp0;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ psrld $30, W; \
+ pslld $2, tmp0; \
+ por W, tmp0;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+ movdqa tmp0, W; \
+ paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \
+ movdqa tmp0, WK((i)&~3);
+
+#define CLEAR_REG(reg) pxor reg, reg;
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data,
+ * size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_ssse3
+ELF(.type _gcry_sha1_transform_amd64_ssse3,@function)
+.align 16
+_gcry_sha1_transform_amd64_ssse3:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: data (64*nblks bytes)
+ * %rdx: nblks
+ */
+ CFI_STARTPROC();
+
+ xorl %eax, %eax;
+ cmpq $0, %rdx;
+ jz .Lret;
+
+ movq %rdx, RNBLKS;
+ movq %rdi, RSTATE;
+ movq %rsi, RDATA;
+ pushq %rbx;
+ CFI_PUSH(%rbx);
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+
+ movq %rsp, ROLDSTACK;
+ CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+ subq $(16*4), %rsp;
+ andq $(~31), %rsp;
+
+ /* Get the values of the chaining variables. */
+ movl state_h0(RSTATE), a;
+ movl state_h1(RSTATE), b;
+ movl state_h2(RSTATE), c;
+ movl state_h3(RSTATE), d;
+ movl state_h4(RSTATE), e;
+
+ movdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+
+ /* Precalc 0-15. */
+ W_PRECALC_00_15_0(0, W0, Wtmp0);
+ W_PRECALC_00_15_1(1, W0, Wtmp0);
+ W_PRECALC_00_15_2(2, W0, Wtmp0);
+ W_PRECALC_00_15_3(3, W0, Wtmp0);
+ W_PRECALC_00_15_0(4, W7, Wtmp0);
+ W_PRECALC_00_15_1(5, W7, Wtmp0);
+ W_PRECALC_00_15_2(6, W7, Wtmp0);
+ W_PRECALC_00_15_3(7, W7, Wtmp0);
+ W_PRECALC_00_15_0(8, W6, Wtmp0);
+ W_PRECALC_00_15_1(9, W6, Wtmp0);
+ W_PRECALC_00_15_2(10, W6, Wtmp0);
+ W_PRECALC_00_15_3(11, W6, Wtmp0);
+ W_PRECALC_00_15_0(12, W5, Wtmp0);
+ W_PRECALC_00_15_1(13, W5, Wtmp0);
+ W_PRECALC_00_15_2(14, W5, Wtmp0);
+ W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+.align 8
+.Loop:
+ addq $64, RDATA;
+
+ /* Transform 0-15 + Precalc 16-31. */
+ R( a, b, c, d, e, F1, 0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+ R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+ R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+
+ /* Transform 16-63 + Precalc 32-79. */
+ R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+ R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+ R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+ R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+ R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+ R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+ R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+ R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+ R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+
+ decq RNBLKS;
+ jz .Lend;
+
+ /* Transform 64-79 + Precalc 0-15 of next block. */
+ R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+ R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+ R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0);
+ R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+ R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+ R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+ R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0);
+ R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+ R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+ R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+ R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0);
+ R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+ R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+ R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+ R( c, d, e, a, b, F4, 78 );
+ addl state_h0(RSTATE), a; W_PRECALC_00_15_2(14, W5, Wtmp0);
+ R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ jmp .Loop;
+
+.align 16
+.Lend:
+ /* Transform 64-79 + Clear XMM registers + Burn stack. */
+ R( b, c, d, e, a, F4, 64 ); CLEAR_REG(BSWAP_REG);
+ R( a, b, c, d, e, F4, 65 ); CLEAR_REG(Wtmp0);
+ R( e, a, b, c, d, F4, 66 ); CLEAR_REG(Wtmp1);
+ R( d, e, a, b, c, F4, 67 ); CLEAR_REG(W0);
+ R( c, d, e, a, b, F4, 68 ); CLEAR_REG(W1);
+ R( b, c, d, e, a, F4, 69 ); CLEAR_REG(W2);
+ R( a, b, c, d, e, F4, 70 ); CLEAR_REG(W3);
+ R( e, a, b, c, d, F4, 71 ); CLEAR_REG(W4);
+ R( d, e, a, b, c, F4, 72 ); CLEAR_REG(W5);
+ R( c, d, e, a, b, F4, 73 ); CLEAR_REG(W6);
+ R( b, c, d, e, a, F4, 74 ); CLEAR_REG(W7);
+ R( a, b, c, d, e, F4, 75 );
+ R( e, a, b, c, d, F4, 76 ); movdqa Wtmp0, (0*16)(%rsp);
+ R( d, e, a, b, c, F4, 77 ); movdqa Wtmp0, (1*16)(%rsp);
+ R( c, d, e, a, b, F4, 78 ); movdqa Wtmp0, (2*16)(%rsp);
+ addl state_h0(RSTATE), a;
+ R( b, c, d, e, a, F4, 79 );
+
+ /* 16*4/16-1 = 3 */
+ movdqa Wtmp0, (3*16)(%rsp);
+
+ /* Update the chaining variables. */
+ addl state_h3(RSTATE), d;
+ addl state_h2(RSTATE), c;
+ addl state_h1(RSTATE), b;
+ addl state_h4(RSTATE), e;
+
+ movl d, state_h3(RSTATE);
+ movl c, state_h2(RSTATE);
+ movl b, state_h1(RSTATE);
+ movl a, state_h0(RSTATE);
+ movl e, state_h4(RSTATE);
+
+ movq ROLDSTACK, %rsp;
+ CFI_REGISTER(ROLDSTACK, %rsp);
+ CFI_DEF_CFA_REGISTER(%rsp);
+
+ popq %rbp;
+ CFI_POP(%rbp);
+ popq %rbx;
+ CFI_POP(%rbx);
+
+ /* stack already burned */
+ xorl %eax, %eax;
+
+.Lret:
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_ssse3,
+ .-_gcry_sha1_transform_amd64_ssse3;)
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha1.c b/comm/third_party/libgcrypt/cipher/sha1.c
new file mode 100644
index 0000000000..35f7376c19
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1.c
@@ -0,0 +1,765 @@
+/* sha1.c - SHA1 hash function
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/* Test vectors:
+ *
+ * "abc"
+ * A999 3E36 4706 816A BA3E 2571 7850 C26C 9CD0 D89D
+ *
+ * "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+ * 8498 3E44 1C3B D26E BAAE 4AA1 F951 29E5 E546 70F1
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "sha1.h"
+
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
+#undef USE_BMI2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_BMI2 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
+#undef USE_AVX2
+#if defined(USE_BMI2) && defined(HAVE_GCC_INLINE_ASM_AVX2)
+# define USE_AVX2 1
+#endif
+
+/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
+#undef USE_SHAEXT
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+ defined(HAVE_GCC_INLINE_ASM_SSE41) && \
+ defined(ENABLE_SHAEXT_SUPPORT)
+# define USE_SHAEXT 1
+#endif
+
+/* USE_NEON indicates whether to enable ARM NEON assembly code. */
+#undef USE_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_NEON 1
+# endif
+#endif
+
+/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
+ * code. */
+#undef USE_ARM_CE
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+# define USE_ARM_CE 1
+# elif defined(__AARCH64EL__) \
+ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+# define USE_ARM_CE 1
+# endif
+#endif
+
+
+/* A macro to test whether P is properly aligned for an u32 type.
+ Note that config.h provides a suitable replacement for uintptr_t if
+ it does not exist in stdint.h. */
+/* #if __GNUC__ >= 2 */
+/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % __alignof__ (u32))) */
+/* #else */
+/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % sizeof (u32))) */
+/* #endif */
+
+
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) || \
+ defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_SSSE3
+unsigned int
+_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int
+_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_BMI2
+unsigned int
+_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+
+#ifdef USE_AVX2
+unsigned int
+_gcry_sha1_transform_amd64_avx2_bmi2 (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx2_bmi2 (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+
+ /* AVX2/BMI2 function only handles pair of blocks so nblks needs to be
+ * multiple of 2 and function does not handle zero nblks. Use AVX/BMI2
+ * code to handle these cases. */
+
+ if (nblks <= 1)
+ return do_sha1_transform_amd64_avx_bmi2 (ctx, data, nblks);
+
+ if (nblks & 1)
+ {
+ (void)_gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, 1);
+ nblks--;
+ data += 64;
+ }
+
+ return _gcry_sha1_transform_amd64_avx2_bmi2 (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif /* USE_AVX2 */
+#endif /* USE_BMI2 */
+
+#ifdef USE_SHAEXT
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha1_transform_intel_shaext (void *state, const unsigned char *data,
+ size_t nblks);
+
+static unsigned int
+do_sha1_transform_intel_shaext (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_NEON
+unsigned int
+_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data,
+ size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv7_neon (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_ARM_CE
+unsigned int
+_gcry_sha1_transform_armv8_ce (void *state, const unsigned char *data,
+ size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef SHA1_USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+do_sha1_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+
+ kimd_execute (KMID_FUNCTION_SHA1, &hd->h0, data, nblks * 64);
+ return 0;
+}
+
+static unsigned int
+do_sha1_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
+ u32 len_msb, u32 len_lsb)
+{
+ SHA1_CONTEXT *hd = ctx;
+
+ /* Make sure that 'final_len' is positioned at correct offset relative
+ * to 'h0'. This is because we are passing 'h0' pointer as start of
+ * parameter block to 'klmd' instruction. */
+
+ gcry_assert (offsetof (SHA1_CONTEXT, final_len_msb)
+ - offsetof (SHA1_CONTEXT, h0) == 5 * sizeof(u32));
+ gcry_assert (offsetof (SHA1_CONTEXT, final_len_lsb)
+ - offsetof (SHA1_CONTEXT, final_len_msb) == 1 * sizeof(u32));
+
+ hd->final_len_msb = len_msb;
+ hd->final_len_lsb = len_lsb;
+
+ klmd_execute (KMID_FUNCTION_SHA1, &hd->h0, data, datalen);
+ return 0;
+}
+#endif
+
+
+static unsigned int
+do_transform_generic (void *c, const unsigned char *data, size_t nblks);
+
+
+static void
+sha1_init (void *context, unsigned int flags)
+{
+ SHA1_CONTEXT *hd = context;
+ unsigned int features = _gcry_get_hw_features ();
+
+ (void)flags;
+
+ hd->h0 = 0x67452301;
+ hd->h1 = 0xefcdab89;
+ hd->h2 = 0x98badcfe;
+ hd->h3 = 0x10325476;
+ hd->h4 = 0xc3d2e1f0;
+
+ hd->bctx.nblocks = 0;
+ hd->bctx.nblocks_high = 0;
+ hd->bctx.count = 0;
+ hd->bctx.blocksize_shift = _gcry_ctz(64);
+
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
+#ifdef USE_SSSE3
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha1_transform_amd64_ssse3;
+#endif
+#ifdef USE_AVX
+ /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
+ * Therefore use this implementation on Intel CPUs only. */
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha1_transform_amd64_avx;
+#endif
+#ifdef USE_BMI2
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha1_transform_amd64_avx_bmi2;
+#endif
+#ifdef USE_AVX2
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_AVX) &&
+ (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha1_transform_amd64_avx2_bmi2;
+#endif
+#ifdef USE_SHAEXT
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha1_transform_intel_shaext;
+#endif
+#ifdef USE_NEON
+ if ((features & HWF_ARM_NEON) != 0)
+ hd->bctx.bwrite = do_sha1_transform_armv7_neon;
+#endif
+#ifdef USE_ARM_CE
+ if ((features & HWF_ARM_SHA1) != 0)
+ hd->bctx.bwrite = do_sha1_transform_armv8_ce;
+#endif
+#ifdef SHA1_USE_S390X_CRYPTO
+ hd->use_s390x_crypto = 0;
+ if ((features & HWF_S390X_MSA) != 0)
+ {
+ if ((kimd_query () & km_function_to_mask (KMID_FUNCTION_SHA1)) &&
+ (klmd_query () & km_function_to_mask (KMID_FUNCTION_SHA1)))
+ {
+ hd->bctx.bwrite = do_sha1_transform_s390x;
+ hd->use_s390x_crypto = 1;
+ }
+ }
+#endif
+
+ (void)features;
+}
+
+/*
+ * Initialize the context HD. This is used to prepare the use of
+ * _gcry_sha1_mixblock. WARNING: This is a special purpose function
+ * for exclusive use by random-csprng.c.
+ */
+void
+_gcry_sha1_mixblock_init (SHA1_CONTEXT *hd)
+{
+ sha1_init (hd, 0);
+}
+
+
+/* Round function macros. */
+#define K1 0x5A827999L
+#define K2 0x6ED9EBA1L
+#define K3 0x8F1BBCDCL
+#define K4 0xCA62C1D6L
+#define F1(x,y,z) ( z ^ ( x & ( y ^ z ) ) )
+#define F2(x,y,z) ( x ^ y ^ z )
+#define F3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) )
+#define F4(x,y,z) ( x ^ y ^ z )
+#define M(i) ( tm = x[ i &0x0f] \
+ ^ x[(i-14)&0x0f] \
+ ^ x[(i-8) &0x0f] \
+ ^ x[(i-3) &0x0f], \
+ (x[i&0x0f] = rol(tm, 1)))
+#define R(a,b,c,d,e,f,k,m) do { e += rol( a, 5 ) \
+ + f( b, c, d ) \
+ + k \
+ + m; \
+ b = rol( b, 30 ); \
+ } while(0)
+
+/*
+ * Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA.
+ */
+static unsigned int
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+
+ do
+ {
+ const u32 *idata = (const void *)data;
+ u32 a, b, c, d, e; /* Local copies of the chaining variables. */
+ u32 tm; /* Helper. */
+ u32 x[16]; /* The array we work on. */
+
+#define I(i) (x[i] = buf_get_be32(idata + i))
+
+ /* Get the values of the chaining variables. */
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+
+ /* Transform. */
+ R( a, b, c, d, e, F1, K1, I( 0) );
+ R( e, a, b, c, d, F1, K1, I( 1) );
+ R( d, e, a, b, c, F1, K1, I( 2) );
+ R( c, d, e, a, b, F1, K1, I( 3) );
+ R( b, c, d, e, a, F1, K1, I( 4) );
+ R( a, b, c, d, e, F1, K1, I( 5) );
+ R( e, a, b, c, d, F1, K1, I( 6) );
+ R( d, e, a, b, c, F1, K1, I( 7) );
+ R( c, d, e, a, b, F1, K1, I( 8) );
+ R( b, c, d, e, a, F1, K1, I( 9) );
+ R( a, b, c, d, e, F1, K1, I(10) );
+ R( e, a, b, c, d, F1, K1, I(11) );
+ R( d, e, a, b, c, F1, K1, I(12) );
+ R( c, d, e, a, b, F1, K1, I(13) );
+ R( b, c, d, e, a, F1, K1, I(14) );
+ R( a, b, c, d, e, F1, K1, I(15) );
+ R( e, a, b, c, d, F1, K1, M(16) );
+ R( d, e, a, b, c, F1, K1, M(17) );
+ R( c, d, e, a, b, F1, K1, M(18) );
+ R( b, c, d, e, a, F1, K1, M(19) );
+ R( a, b, c, d, e, F2, K2, M(20) );
+ R( e, a, b, c, d, F2, K2, M(21) );
+ R( d, e, a, b, c, F2, K2, M(22) );
+ R( c, d, e, a, b, F2, K2, M(23) );
+ R( b, c, d, e, a, F2, K2, M(24) );
+ R( a, b, c, d, e, F2, K2, M(25) );
+ R( e, a, b, c, d, F2, K2, M(26) );
+ R( d, e, a, b, c, F2, K2, M(27) );
+ R( c, d, e, a, b, F2, K2, M(28) );
+ R( b, c, d, e, a, F2, K2, M(29) );
+ R( a, b, c, d, e, F2, K2, M(30) );
+ R( e, a, b, c, d, F2, K2, M(31) );
+ R( d, e, a, b, c, F2, K2, M(32) );
+ R( c, d, e, a, b, F2, K2, M(33) );
+ R( b, c, d, e, a, F2, K2, M(34) );
+ R( a, b, c, d, e, F2, K2, M(35) );
+ R( e, a, b, c, d, F2, K2, M(36) );
+ R( d, e, a, b, c, F2, K2, M(37) );
+ R( c, d, e, a, b, F2, K2, M(38) );
+ R( b, c, d, e, a, F2, K2, M(39) );
+ R( a, b, c, d, e, F3, K3, M(40) );
+ R( e, a, b, c, d, F3, K3, M(41) );
+ R( d, e, a, b, c, F3, K3, M(42) );
+ R( c, d, e, a, b, F3, K3, M(43) );
+ R( b, c, d, e, a, F3, K3, M(44) );
+ R( a, b, c, d, e, F3, K3, M(45) );
+ R( e, a, b, c, d, F3, K3, M(46) );
+ R( d, e, a, b, c, F3, K3, M(47) );
+ R( c, d, e, a, b, F3, K3, M(48) );
+ R( b, c, d, e, a, F3, K3, M(49) );
+ R( a, b, c, d, e, F3, K3, M(50) );
+ R( e, a, b, c, d, F3, K3, M(51) );
+ R( d, e, a, b, c, F3, K3, M(52) );
+ R( c, d, e, a, b, F3, K3, M(53) );
+ R( b, c, d, e, a, F3, K3, M(54) );
+ R( a, b, c, d, e, F3, K3, M(55) );
+ R( e, a, b, c, d, F3, K3, M(56) );
+ R( d, e, a, b, c, F3, K3, M(57) );
+ R( c, d, e, a, b, F3, K3, M(58) );
+ R( b, c, d, e, a, F3, K3, M(59) );
+ R( a, b, c, d, e, F4, K4, M(60) );
+ R( e, a, b, c, d, F4, K4, M(61) );
+ R( d, e, a, b, c, F4, K4, M(62) );
+ R( c, d, e, a, b, F4, K4, M(63) );
+ R( b, c, d, e, a, F4, K4, M(64) );
+ R( a, b, c, d, e, F4, K4, M(65) );
+ R( e, a, b, c, d, F4, K4, M(66) );
+ R( d, e, a, b, c, F4, K4, M(67) );
+ R( c, d, e, a, b, F4, K4, M(68) );
+ R( b, c, d, e, a, F4, K4, M(69) );
+ R( a, b, c, d, e, F4, K4, M(70) );
+ R( e, a, b, c, d, F4, K4, M(71) );
+ R( d, e, a, b, c, F4, K4, M(72) );
+ R( c, d, e, a, b, F4, K4, M(73) );
+ R( b, c, d, e, a, F4, K4, M(74) );
+ R( a, b, c, d, e, F4, K4, M(75) );
+ R( e, a, b, c, d, F4, K4, M(76) );
+ R( d, e, a, b, c, F4, K4, M(77) );
+ R( c, d, e, a, b, F4, K4, M(78) );
+ R( b, c, d, e, a, F4, K4, M(79) );
+
+ /* Update the chaining variables. */
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+
+ data += 64;
+ }
+ while (--nblks);
+
+ return 88+4*sizeof(void*);
+}
+
+
+/*
+ * Apply the SHA-1 transform function on the buffer BLOCKOF64BYTE
+ * which must have a length 64 bytes. BLOCKOF64BYTE must be 32-bit
+ * aligned. Updates the 20 bytes in BLOCKOF64BYTE with its mixed
+ * content. Returns the number of bytes which should be burned on the
+ * stack. You need to use _gcry_sha1_mixblock_init to initialize the
+ * context.
+ * WARNING: This is a special purpose function for exclusive use by
+ * random-csprng.c.
+ */
+unsigned int
+_gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte)
+{
+ u32 *p = blockof64byte;
+ unsigned int nburn;
+
+ nburn = (*hd->bctx.bwrite) (hd, blockof64byte, 1);
+ p[0] = hd->h0;
+ p[1] = hd->h1;
+ p[2] = hd->h2;
+ p[3] = hd->h3;
+ p[4] = hd->h4;
+
+ return nburn;
+}
+
+
+/* The routine final terminates the computation and
+ * returns the digest.
+ * The handle is prepared for a new cycle, but adding bytes to the
+ * handle will the destroy the returned buffer.
+ * Returns: 20 bytes representing the digest.
+ */
+
+static void
+sha1_final(void *context)
+{
+ SHA1_CONTEXT *hd = context;
+ u32 t, th, msb, lsb;
+ unsigned char *p;
+ unsigned int burn;
+
+ t = hd->bctx.nblocks;
+ if (sizeof t == sizeof hd->bctx.nblocks)
+ th = hd->bctx.nblocks_high;
+ else
+ th = hd->bctx.nblocks >> 32;
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 26);
+ /* add the count */
+ t = lsb;
+ if( (lsb += hd->bctx.count) < t )
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if (0)
+ { }
+#ifdef SHA1_USE_S390X_CRYPTO
+ else if (hd->use_s390x_crypto)
+ {
+ burn = do_sha1_final_s390x (hd, hd->bctx.buf, hd->bctx.count, msb, lsb);
+ }
+#endif
+ else if (hd->bctx.count < 56) /* enough room */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+ if (hd->bctx.count < 56)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+ /* append the 64 bit count */
+ buf_put_be32(hd->bctx.buf + 56, msb);
+ buf_put_be32(hd->bctx.buf + 60, lsb);
+ burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 1 );
+ }
+ else /* need one extra block */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+ /* fill pad and next block with zeroes */
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+ /* append the 64 bit count */
+ buf_put_be32(hd->bctx.buf + 64 + 56, msb);
+ buf_put_be32(hd->bctx.buf + 64 + 60, lsb);
+ burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 2 );
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
+ X(0);
+ X(1);
+ X(2);
+ X(3);
+ X(4);
+#undef X
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static unsigned char *
+sha1_read( void *context )
+{
+ SHA1_CONTEXT *hd = context;
+
+ return hd->bctx.buf;
+}
+
+/****************
+ * Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 20 bytes.
+ */
+void
+_gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SHA1_CONTEXT hd;
+
+ sha1_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sha1_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 20);
+}
+
+
+/* Variant of the above shortcut function using a multiple buffers. */
+void
+_gcry_sha1_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
+{
+ SHA1_CONTEXT hd;
+
+ sha1_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sha1_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 20);
+}
+
+
+
+/*
+ Self-test section.
+ */
+
+
+static gpg_err_code_t
+selftests_sha1 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA1, 0,
+ "abc", 3,
+ "\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E"
+ "\x25\x71\x78\x50\xC2\x6C\x9C\xD0\xD8\x9D", 20);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA1, 0,
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
+ "\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE"
+ "\x4A\xA1\xF9\x51\x29\xE5\xE5\x46\x70\xF1", 20);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA1, 1,
+ NULL, 0,
+ "\x34\xAA\x97\x3C\xD4\xC4\xDA\xA4\xF6\x1E"
+ "\xEB\x2B\xDB\xAD\x27\x31\x65\x34\x01\x6F", 20);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SHA1, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_MD_SHA1:
+ ec = selftests_sha1 (extended, report);
+ break;
+ default:
+ ec = GPG_ERR_DIGEST_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+
+
+
+static unsigned char asn[15] = /* Object ID is 1.3.14.3.2.26 */
+ { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03,
+ 0x02, 0x1a, 0x05, 0x00, 0x04, 0x14 };
+
+static gcry_md_oid_spec_t oid_spec_sha1[] =
+ {
+ /* iso.member-body.us.rsadsi.pkcs.pkcs-1.5 (sha1WithRSAEncryption) */
+ { "1.2.840.113549.1.1.5" },
+ /* iso.member-body.us.x9-57.x9cm.3 (dsaWithSha1)*/
+ { "1.2.840.10040.4.3" },
+ /* from NIST's OIW (sha1) */
+ { "1.3.14.3.2.26" },
+ /* from NIST OIW (sha-1WithRSAEncryption) */
+ { "1.3.14.3.2.29" },
+ /* iso.member-body.us.ansi-x9-62.signatures.ecdsa-with-sha1 */
+ { "1.2.840.10045.4.1" },
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha1 =
+ {
+ GCRY_MD_SHA1, {0, 1},
+ "SHA1", asn, DIM (asn), oid_spec_sha1, 20,
+ sha1_init, _gcry_md_block_write, sha1_final, sha1_read, NULL,
+ _gcry_sha1_hash_buffer, _gcry_sha1_hash_buffers,
+ sizeof (SHA1_CONTEXT),
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/sha1.h b/comm/third_party/libgcrypt/cipher/sha1.h
new file mode 100644
index 0000000000..a359765847
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha1.h
@@ -0,0 +1,47 @@
+/* sha1.h - SHA-1 context definition
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GCRY_SHA1_H
+#define GCRY_SHA1_H
+
+#include "hash-common.h"
+
+
+/* SHA1_USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef SHA1_USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define SHA1_USE_S390X_CRYPTO 1
+#endif /* SHA1_USE_S390X_CRYPTO */
+
+
+/* We need this here for direct use by random-csprng.c. */
+typedef struct
+{
+ gcry_md_block_ctx_t bctx;
+ u32 h0,h1,h2,h3,h4;
+#ifdef SHA1_USE_S390X_CRYPTO
+ u32 final_len_msb, final_len_lsb; /* needs to be right after h4. */
+ int use_s390x_crypto;
+#endif
+} SHA1_CONTEXT;
+
+
+void _gcry_sha1_mixblock_init (SHA1_CONTEXT *hd);
+unsigned int _gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte);
+
+#endif /*GCRY_SHA1_H*/
diff --git a/comm/third_party/libgcrypt/cipher/sha256-armv8-aarch32-ce.S b/comm/third_party/libgcrypt/cipher/sha256-armv8-aarch32-ce.S
new file mode 100644
index 0000000000..2b17ab1b17
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256-armv8-aarch32-ce.S
@@ -0,0 +1,231 @@
+/* sha256-armv8-aarch32-ce.S - ARM/CE accelerated SHA-256 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) && defined(USE_SHA256)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+# define GET_DATA_POINTER(reg, name, rtmp) \
+ ldr reg, 1f; \
+ ldr rtmp, 2f; \
+ b 3f; \
+ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+ 2: .word name(GOT); \
+ 3: add reg, pc, reg; \
+ ldr reg, [reg, rtmp];
+#else
+# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+.align 4
+gcry_sha256_aarch32_ce_K:
+.LK:
+ .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+
+/* Register macros */
+
+#define qH0123 q0
+#define qH4567 q1
+
+#define qABCD0 q2
+#define qABCD1 q3
+#define qEFGH q4
+
+#define qT0 q5
+#define qT1 q6
+
+#define qW0 q8
+#define qW1 q9
+#define qW2 q10
+#define qW3 q11
+
+#define qK0 q12
+#define qK1 q13
+#define qK2 q14
+#define qK3 q15
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+
+#define do_loadk(nk0, nk1) vld1.32 {nk0-nk1},[lr]!;
+#define do_add(a, b) vadd.u32 a, a, b;
+#define do_sha256su0(w0, w1) sha256su0.32 w0, w1;
+#define do_sha256su1(w0, w2, w3) sha256su1.32 w0, w2, w3;
+
+#define do_rounds(k, nk0, nk1, w0, w1, w2, w3, loadk_fn, add_fn, su0_fn, su1_fn) \
+ loadk_fn( nk0, nk1 ); \
+ su0_fn( w0, w1 ); \
+ vmov qABCD1, qABCD0; \
+ sha256h.32 qABCD0, qEFGH, k; \
+ sha256h2.32 qEFGH, qABCD1, k; \
+ add_fn( nk0, w2 ); \
+ su1_fn( w0, w2, w3 );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) veor reg, reg;
+
+
+/*
+ * unsigned int
+ * _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data,
+ * size_t num_blks)
+ */
+.align 3
+.globl _gcry_sha256_transform_armv8_ce
+.type _gcry_sha256_transform_armv8_ce,%function;
+_gcry_sha256_transform_armv8_ce:
+ /* input:
+ * r0: ctx, CTX
+ * r1: data (64*nblks bytes)
+ * r2: nblks
+ */
+
+ cmp r2, #0;
+ push {r4,lr};
+ beq .Ldo_nothing;
+
+ vpush {q4-q7};
+
+ GET_DATA_POINTER(r4, .LK, lr);
+ mov lr, r4
+
+ vld1.32 {qH0123-qH4567}, [r0] /* load state */
+
+ vld1.8 {qW0-qW1}, [r1]!
+ do_loadk(qK0, qK1)
+ vld1.8 {qW2-qW3}, [r1]!
+ vmov qABCD0, qH0123
+ vmov qEFGH, qH4567
+
+ vrev32.8 qW0, qW0
+ vrev32.8 qW1, qW1
+ vrev32.8 qW2, qW2
+ do_add(qK0, qW0)
+ vrev32.8 qW3, qW3
+ do_add(qK1, qW1)
+
+.Loop:
+ do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ subs r2,r2,#1
+ do_rounds(qK1, qK3, _ , qW1, qW2, qW3, qW0, _ , do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK3, qK1, _ , qW3, qW0, qW1, qW2, _ , do_add, do_sha256su0, do_sha256su1)
+
+ do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK1, qK3, _ , qW1, qW2, qW3, qW0, _ , do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK3, qK1, _ , qW3, qW0, qW1, qW2, _ , do_add, do_sha256su0, do_sha256su1)
+
+ do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK1, qK3, _ , qW1, qW2, qW3, qW0, _ , do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(qK3, qK1, _ , qW3, qW0, qW1, qW2, _ , do_add, do_sha256su0, do_sha256su1)
+
+ beq .Lend
+
+ do_rounds(qK0, qK2, qK3, qW0, _ , qW2, qW3, do_loadk, do_add, _, _)
+ vld1.8 {qW0}, [r1]!
+ mov lr, r4
+ do_rounds(qK1, qK3, _ , qW1, _ , qW3, _ , _ , do_add, _, _)
+ vld1.8 {qW1}, [r1]!
+ vrev32.8 qW0, qW0
+ do_rounds(qK2, qK0, qK1, qW2, _ , qW0, _ , do_loadk, do_add, _, _)
+ vrev32.8 qW1, qW1
+ vld1.8 {qW2}, [r1]!
+ do_rounds(qK3, qK1, _ , qW3, _ , qW1, _ , _ , do_add, _, _)
+ vld1.8 {qW3}, [r1]!
+
+ vadd.u32 qH0123, qABCD0
+ vadd.u32 qH4567, qEFGH
+
+ vrev32.8 qW2, qW2
+ vmov qABCD0, qH0123
+ vrev32.8 qW3, qW3
+ vmov qEFGH, qH4567
+
+ b .Loop
+
+.Lend:
+
+ do_rounds(qK0, qK2, qK3, qW0, _ , qW2, qW3, do_loadk, do_add, _, _)
+ do_rounds(qK1, qK3, _ , qW1, _ , qW3, _ , _ , do_add, _, _)
+ do_rounds(qK2, _ , _ , qW2, _ , _ , _ , _ , _, _, _)
+ do_rounds(qK3, _ , _ , qW3, _ , _ , _ , _ , _, _, _)
+
+ CLEAR_REG(qW0)
+ CLEAR_REG(qW1)
+ CLEAR_REG(qW2)
+ CLEAR_REG(qW3)
+ CLEAR_REG(qK0)
+ CLEAR_REG(qK1)
+ CLEAR_REG(qK2)
+ CLEAR_REG(qK3)
+
+ vadd.u32 qH0123, qABCD0
+ vadd.u32 qH4567, qEFGH
+
+ CLEAR_REG(qABCD0)
+ CLEAR_REG(qABCD1)
+ CLEAR_REG(qEFGH)
+
+ vst1.32 {qH0123-qH4567}, [r0] /* store state */
+
+ CLEAR_REG(qH0123)
+ CLEAR_REG(qH4567)
+ vpop {q4-q7}
+
+.Ldo_nothing:
+ mov r0, #0
+ pop {r4,pc}
+.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha256-armv8-aarch64-ce.S b/comm/third_party/libgcrypt/cipher/sha256-armv8-aarch64-ce.S
new file mode 100644
index 0000000000..f57cae290b
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256-armv8-aarch64-ce.S
@@ -0,0 +1,215 @@
+/* sha256-armv8-aarch64-ce.S - ARM/CE accelerated SHA-256 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA256)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Constants */
+
+.align 4
+gcry_sha256_aarch64_ce_K:
+.LK:
+ .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+
+/* Register macros */
+
+#define vH0123 v0
+#define vH4567 v1
+
+#define vABCD0 v2
+#define qABCD0 q2
+#define vABCD1 v3
+#define qABCD1 q3
+#define vEFGH v4
+#define qEFGH q4
+
+#define vT0 v5
+#define vT1 v6
+
+#define vW0 v16
+#define vW1 v17
+#define vW2 v18
+#define vW3 v19
+
+#define vK0 v20
+#define vK1 v21
+#define vK2 v22
+#define vK3 v23
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+
+#define do_loadk(nk0, nk1) ld1 {nk0.16b-nk1.16b},[x3],#32;
+#define do_add(a, b) add a.4s, a.4s, b.4s;
+#define do_sha256su0(w0, w1) sha256su0 w0.4s, w1.4s;
+#define do_sha256su1(w0, w2, w3) sha256su1 w0.4s, w2.4s, w3.4s;
+
+#define do_rounds(k, nk0, nk1, w0, w1, w2, w3, loadk_fn, add_fn, su0_fn, su1_fn) \
+ loadk_fn( v##nk0, v##nk1 ); \
+ su0_fn( v##w0, v##w1 ); \
+ mov vABCD1.16b, vABCD0.16b; \
+ sha256h qABCD0, qEFGH, v##k.4s; \
+ sha256h2 qEFGH, qABCD1, v##k.4s; \
+ add_fn( v##nk0, v##w2 ); \
+ su1_fn( v##w0, v##w2, v##w3 );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b;
+
+
+/*
+ * unsigned int
+ * _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data,
+ * size_t num_blks)
+ */
+.align 3
+.globl _gcry_sha256_transform_armv8_ce
+ELF(.type _gcry_sha256_transform_armv8_ce,%function;)
+_gcry_sha256_transform_armv8_ce:
+ /* input:
+ * r0: ctx, CTX
+ * r1: data (64*nblks bytes)
+ * r2: nblks
+ */
+ CFI_STARTPROC();
+
+ cbz x2, .Ldo_nothing;
+
+ GET_DATA_POINTER(x3, .LK);
+ mov x4, x3
+
+ ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */
+
+ ld1 {vW0.16b-vW1.16b}, [x1], #32
+ do_loadk(vK0, vK1)
+ ld1 {vW2.16b-vW3.16b}, [x1], #32
+ mov vABCD0.16b, vH0123.16b
+ mov vEFGH.16b, vH4567.16b
+
+ rev32 vW0.16b, vW0.16b
+ rev32 vW1.16b, vW1.16b
+ rev32 vW2.16b, vW2.16b
+ do_add(vK0, vW0)
+ rev32 vW3.16b, vW3.16b
+ do_add(vK1, vW1)
+
+.Loop:
+ do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ sub x2,x2,#1
+ do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1)
+
+ do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1)
+
+ do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1)
+ do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1)
+
+ cbz x2, .Lend
+
+ do_rounds(K0, K2, K3, W0, _ , W2, W3, do_loadk, do_add, _, _)
+ ld1 {vW0.16b}, [x1], #16
+ mov x3, x4
+ do_rounds(K1, K3, _ , W1, _ , W3, _ , _ , do_add, _, _)
+ ld1 {vW1.16b}, [x1], #16
+ rev32 vW0.16b, vW0.16b
+ do_rounds(K2, K0, K1, W2, _ , W0, _ , do_loadk, do_add, _, _)
+ rev32 vW1.16b, vW1.16b
+ ld1 {vW2.16b}, [x1], #16
+ do_rounds(K3, K1, _ , W3, _ , W1, _ , _ , do_add, _, _)
+ ld1 {vW3.16b}, [x1], #16
+
+ do_add(vH0123, vABCD0)
+ do_add(vH4567, vEFGH)
+
+ rev32 vW2.16b, vW2.16b
+ mov vABCD0.16b, vH0123.16b
+ rev32 vW3.16b, vW3.16b
+ mov vEFGH.16b, vH4567.16b
+
+ b .Loop
+
+.Lend:
+
+ do_rounds(K0, K2, K3, W0, _ , W2, W3, do_loadk, do_add, _, _)
+ do_rounds(K1, K3, _ , W1, _ , W3, _ , _ , do_add, _, _)
+ do_rounds(K2, _ , _ , W2, _ , _ , _ , _ , _, _, _)
+ do_rounds(K3, _ , _ , W3, _ , _ , _ , _ , _, _, _)
+
+ CLEAR_REG(vW0)
+ CLEAR_REG(vW1)
+ CLEAR_REG(vW2)
+ CLEAR_REG(vW3)
+ CLEAR_REG(vK0)
+ CLEAR_REG(vK1)
+ CLEAR_REG(vK2)
+ CLEAR_REG(vK3)
+
+ do_add(vH0123, vABCD0)
+ do_add(vH4567, vEFGH)
+
+ CLEAR_REG(vABCD0)
+ CLEAR_REG(vABCD1)
+ CLEAR_REG(vEFGH)
+
+ st1 {vH0123.4s-vH4567.4s}, [x0] /* store state */
+
+ CLEAR_REG(vH0123)
+ CLEAR_REG(vH4567)
+
+.Ldo_nothing:
+ mov x0, #0
+ ret
+ CFI_ENDPROC();
+ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;)
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha256-avx-amd64.S b/comm/third_party/libgcrypt/cipher/sha256-avx-amd64.S
new file mode 100644
index 0000000000..ec945f8473
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256-avx-amd64.S
@@ -0,0 +1,506 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the
+; distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This code is described in an Intel White-Paper:
+; "Fast SHA-256 Implementations on Intel Architecture Processors"
+;
+; To find it, surf to http://www.intel.com/p/en_US/embedded
+; and search for that title.
+; The paper is expected to be released roughly at the end of April, 2012
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 1 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ * by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Note: Based on the SSSE3 implementation.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA256)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+#define VMOVDQ vmovdqu /* assume buffers not aligned */
+
+#define ROR(p1, p2) \
+ /* shld is faster than ror on Intel Sandybridge */ \
+ shld p1, p1, (32 - p2);
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros*/
+
+/* addm [mem], reg
+ * Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+ add p2, p1; \
+ mov p1, p2;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+/* COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
+ * Load xmm with mem and byte swap each dword */
+#define COPY_XMM_AND_BSWAP(p1, p2, p3) \
+ VMOVDQ p1, p2; \
+ vpshufb p1, p1, p3;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+#define X0 xmm4
+#define X1 xmm5
+#define X2 xmm6
+#define X3 xmm7
+
+#define XTMP0 xmm0
+#define XTMP1 xmm1
+#define XTMP2 xmm2
+#define XTMP3 xmm3
+#define XTMP4 xmm8
+#define XFER xmm9
+
+#define SHUF_00BA xmm10 /* shuffle xBxA -> 00BA */
+#define SHUF_DC00 xmm11 /* shuffle xDxC -> DC00 */
+#define BYTE_FLIP_MASK xmm12
+
+#define NUM_BLKS rdx /* 3rd arg */
+#define CTX rsi /* 2nd arg */
+#define INP rdi /* 1st arg */
+
+#define SRND rdi /* clobbers INP */
+#define c ecx
+#define d r8d
+#define e edx
+
+#define TBL rbp
+#define a eax
+#define b ebx
+
+#define f r9d
+#define g r10d
+#define h r11d
+
+#define y0 r13d
+#define y1 r14d
+#define y2 r15d
+
+
+
+#define _INP_END_SIZE 8
+#define _INP_SIZE 8
+#define _XFER_SIZE 8
+#define _XMM_SAVE_SIZE 0
+/* STACK_SIZE plus pushes must be an odd multiple of 8 */
+#define _ALIGN_SIZE 8
+
+#define _INP_END 0
+#define _INP (_INP_END + _INP_END_SIZE)
+#define _XFER (_INP + _INP_SIZE)
+#define _XMM_SAVE (_XFER + _XFER_SIZE + _ALIGN_SIZE)
+#define STACK_SIZE (_XMM_SAVE + _XMM_SAVE_SIZE)
+
+
+#define FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ /* compute s0 four at a time and s1 two at a time */; \
+ /* compute W[-16] + W[-7] 4 at a time */; \
+ mov y0, e /* y0 = e */; \
+ ROR( y0, (25-11)) /* y0 = e >> (25-11) */; \
+ mov y1, a /* y1 = a */; \
+ vpalignr XTMP0, X3, X2, 4 /* XTMP0 = W[-7] */; \
+ ROR( y1, (22-13)) /* y1 = a >> (22-13) */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ mov y2, f /* y2 = f */; \
+ ROR( y0, (11-6)) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ xor y2, g /* y2 = f^g */; \
+ vpaddd XTMP0, XTMP0, X0 /* XTMP0 = W[-7] + W[-16] */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ ROR( y1, (13-2)) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ /* compute s0 */; \
+ vpalignr XTMP1, X1, X0, 4 /* XTMP1 = W[-15] */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ ROR( y0, 6) /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ ROR( y1, 2) /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ add y2, [rsp + _XFER + 0*4] /* y2 = k + w + S1 + CH */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ vpslld XTMP2, XTMP1, (32-7); \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ vpsrld XTMP3, XTMP1, 7; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ vpor XTMP3, XTMP3, XTMP2 /* XTMP1 = W[-15] ror 7 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ mov y0, e /* y0 = e */; \
+ mov y1, a /* y1 = a */; \
+ ROR( y0, (25-11)) /* y0 = e >> (25-11) */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ mov y2, f /* y2 = f */; \
+ ROR( y1, (22-13)) /* y1 = a >> (22-13) */; \
+ vpslld XTMP2, XTMP1, (32-18); \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ ROR( y0, (11-6)) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ xor y2, g /* y2 = f^g */; \
+ vpsrld XTMP4, XTMP1, 18; \
+ ROR( y1, (13-2)) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ ROR( y0, 6) /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ vpxor XTMP4, XTMP4, XTMP3; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ vpsrld XTMP1, XTMP1, 3 /* XTMP4 = W[-15] >> 3 */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ add y2, [rsp + _XFER + 1*4] /* y2 = k + w + S1 + CH */; \
+ ROR( y1, 2) /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ vpxor XTMP1, XTMP1, XTMP2 /* XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ vpxor XTMP1, XTMP1, XTMP4 /* XTMP1 = s0 */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ /* compute low s1 */; \
+ vpshufd XTMP2, X3, 0b11111010 /* XTMP2 = W[-2] {BBAA} */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ vpaddd XTMP0, XTMP0, XTMP1 /* XTMP0 = W[-16] + W[-7] + s0 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ mov y0, e /* y0 = e */; \
+ mov y1, a /* y1 = a */; \
+ ROR( y0, (25-11)) /* y0 = e >> (25-11) */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ ROR( y1, (22-13)) /* y1 = a >> (22-13) */; \
+ mov y2, f /* y2 = f */; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ ROR( y0, (11-6)) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ vpsrlq XTMP3, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xBxA} */; \
+ xor y2, g /* y2 = f^g */; \
+ vpsrlq XTMP4, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xBxA} */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ vpsrld XTMP2, XTMP2, 10 /* XTMP4 = W[-2] >> 10 {BBAA} */; \
+ ROR( y1, (13-2)) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ ROR( y0, 6) /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ vpxor XTMP2, XTMP2, XTMP3; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ ROR( y1, 2) /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, [rsp + _XFER + 2*4] /* y2 = k + w + S1 + CH */; \
+ vpxor XTMP4, XTMP4, XTMP2 /* XTMP4 = s1 {xBxA} */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ vpshufb XTMP4, XTMP4, SHUF_00BA /* XTMP4 = s1 {00BA} */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ vpaddd XTMP0, XTMP0, XTMP4 /* XTMP0 = {..., ..., W[1], W[0]} */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ /* compute high s1 */; \
+ vpshufd XTMP2, XTMP0, 0b01010000 /* XTMP2 = W[-2] {DDCC} */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ mov y0, e /* y0 = e */; \
+ ROR( y0, (25-11)) /* y0 = e >> (25-11) */; \
+ mov y1, a /* y1 = a */; \
+ ROR( y1, (22-13)) /* y1 = a >> (22-13) */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ mov y2, f /* y2 = f */; \
+ ROR( y0, (11-6)) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ vpsrlq XTMP3, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xDxC} */; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ xor y2, g /* y2 = f^g */; \
+ vpsrlq X0, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xDxC} */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ ROR( y1, (13-2)) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ vpsrld XTMP2, XTMP2, 10 /* X0 = W[-2] >> 10 {DDCC} */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ ROR( y0, 6) /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ vpxor XTMP2, XTMP2, XTMP3; \
+ ROR( y1, 2) /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ add y2, [rsp + _XFER + 3*4] /* y2 = k + w + S1 + CH */; \
+ vpxor X0, X0, XTMP2 /* X0 = s1 {xDxC} */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ vpshufb X0, X0, SHUF_DC00 /* X0 = s1 {DC00} */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ vpaddd X0, X0, XTMP0 /* X0 = {W[3], W[2], W[1], W[0]} */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h); \
+ FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, h, a, b, c, d, e, f, g); \
+ FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, g, h, a, b, c, d, e, f); \
+ FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, f, g, h, a, b, c, d, e);
+
+/* input is [rsp + _XFER + %1 * 4] */
+#define DO_ROUND(i1, a, b, c, d, e, f, g, h) \
+ mov y0, e /* y0 = e */; \
+ ROR( y0, (25-11)) /* y0 = e >> (25-11) */; \
+ mov y1, a /* y1 = a */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ ROR( y1, (22-13)) /* y1 = a >> (22-13) */; \
+ mov y2, f /* y2 = f */; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ ROR( y0, (11-6)) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ xor y2, g /* y2 = f^g */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ ROR( y1, (13-2)) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ ROR( y0, 6) /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ ROR( y1, 2) /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, [rsp + _XFER + i1 * 4] /* y2 = k + w + S1 + CH */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; void sha256_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
+;; arg 1 : pointer to input data
+;; arg 2 : pointer to digest
+;; arg 3 : Num blocks
+*/
+.text
+.globl _gcry_sha256_transform_amd64_avx
+ELF(.type _gcry_sha256_transform_amd64_avx,@function;)
+.align 16
+_gcry_sha256_transform_amd64_avx:
+ CFI_STARTPROC()
+ vzeroupper
+
+ push rbx
+ CFI_PUSH(rbx)
+ push rbp
+ CFI_PUSH(rbp)
+ push r13
+ CFI_PUSH(r13)
+ push r14
+ CFI_PUSH(r14)
+ push r15
+ CFI_PUSH(r15)
+
+ sub rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(STACK_SIZE);
+
+ shl NUM_BLKS, 6 /* convert to bytes */
+ jz .Ldone_hash
+ add NUM_BLKS, INP /* pointer to end of data */
+ mov [rsp + _INP_END], NUM_BLKS
+
+ /* load initial digest */
+ mov a,[4*0 + CTX]
+ mov b,[4*1 + CTX]
+ mov c,[4*2 + CTX]
+ mov d,[4*3 + CTX]
+ mov e,[4*4 + CTX]
+ mov f,[4*5 + CTX]
+ mov g,[4*6 + CTX]
+ mov h,[4*7 + CTX]
+
+ vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+ vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+ vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+.Loop0:
+ lea TBL, [.LK256 ADD_RIP]
+
+ /* byte swap first 16 dwords */
+ COPY_XMM_AND_BSWAP(X0, [INP + 0*16], BYTE_FLIP_MASK)
+ COPY_XMM_AND_BSWAP(X1, [INP + 1*16], BYTE_FLIP_MASK)
+ COPY_XMM_AND_BSWAP(X2, [INP + 2*16], BYTE_FLIP_MASK)
+ COPY_XMM_AND_BSWAP(X3, [INP + 3*16], BYTE_FLIP_MASK)
+
+ mov [rsp + _INP], INP
+
+ /* schedule 48 input dwords, by doing 3 rounds of 16 each */
+ mov SRND, 3
+.align 16
+.Loop1:
+ vpaddd XFER, X0, [TBL + 0*16]
+ vmovdqa [rsp + _XFER], XFER
+ FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h)
+
+ vpaddd XFER, X1, [TBL + 1*16]
+ vmovdqa [rsp + _XFER], XFER
+ FOUR_ROUNDS_AND_SCHED(X1, X2, X3, X0, e, f, g, h, a, b, c, d)
+
+ vpaddd XFER, X2, [TBL + 2*16]
+ vmovdqa [rsp + _XFER], XFER
+ FOUR_ROUNDS_AND_SCHED(X2, X3, X0, X1, a, b, c, d, e, f, g, h)
+
+ vpaddd XFER, X3, [TBL + 3*16]
+ vmovdqa [rsp + _XFER], XFER
+ add TBL, 4*16
+ FOUR_ROUNDS_AND_SCHED(X3, X0, X1, X2, e, f, g, h, a, b, c, d)
+
+ sub SRND, 1
+ jne .Loop1
+
+ mov SRND, 2
+.Loop2:
+ vpaddd X0, X0, [TBL + 0*16]
+ vmovdqa [rsp + _XFER], X0
+ DO_ROUND(0, a, b, c, d, e, f, g, h)
+ DO_ROUND(1, h, a, b, c, d, e, f, g)
+ DO_ROUND(2, g, h, a, b, c, d, e, f)
+ DO_ROUND(3, f, g, h, a, b, c, d, e)
+ vpaddd X1, X1, [TBL + 1*16]
+ vmovdqa [rsp + _XFER], X1
+ add TBL, 2*16
+ DO_ROUND(0, e, f, g, h, a, b, c, d)
+ DO_ROUND(1, d, e, f, g, h, a, b, c)
+ DO_ROUND(2, c, d, e, f, g, h, a, b)
+ DO_ROUND(3, b, c, d, e, f, g, h, a)
+
+ vmovdqa X0, X2
+ vmovdqa X1, X3
+
+ sub SRND, 1
+ jne .Loop2
+
+ addm([4*0 + CTX],a)
+ addm([4*1 + CTX],b)
+ addm([4*2 + CTX],c)
+ addm([4*3 + CTX],d)
+ addm([4*4 + CTX],e)
+ addm([4*5 + CTX],f)
+ addm([4*6 + CTX],g)
+ addm([4*7 + CTX],h)
+
+ mov INP, [rsp + _INP]
+ add INP, 64
+ cmp INP, [rsp + _INP_END]
+ jne .Loop0
+
+.Ldone_hash:
+ vzeroall
+
+ vmovdqa [rsp + _XFER], XFER
+ xor eax, eax
+
+ add rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(-STACK_SIZE);
+
+ pop r15
+ CFI_POP(r15)
+ pop r14
+ CFI_POP(r14)
+ pop r13
+ CFI_POP(r13)
+ pop rbp
+ CFI_POP(rbp)
+ pop rbx
+ CFI_POP(rbx)
+
+ ret
+ CFI_ENDPROC()
+
+
+.align 16
+.LK256:
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203
+
+/* shuffle xBxA -> 00BA */
+.L_SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
+
+/* shuffle xDxC -> DC00 */
+.L_SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S b/comm/third_party/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S
new file mode 100644
index 0000000000..d130dd4a61
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S
@@ -0,0 +1,527 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the
+; distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This code is described in an Intel White-Paper:
+; "Fast SHA-256 Implementations on Intel Architecture Processors"
+;
+; To find it, surf to http://www.intel.com/p/en_US/embedded
+; and search for that title.
+; The paper is expected to be released roughly at the end of April, 2012
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 2 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ * by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ defined(USE_SHA256)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+#define VMOVDQ vmovdqu /* ; assume buffers not aligned */
+
+/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros */
+
+/* addm [mem], reg */
+/* Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+ add p2, p1; \
+ mov p1, p2;
+
+/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
+
+#define X0 ymm4
+#define X1 ymm5
+#define X2 ymm6
+#define X3 ymm7
+
+/* XMM versions of above */
+#define XWORD0 xmm4
+#define XWORD1 xmm5
+#define XWORD2 xmm6
+#define XWORD3 xmm7
+
+#define XTMP0 ymm0
+#define XTMP1 ymm1
+#define XTMP2 ymm2
+#define XTMP3 ymm3
+#define XTMP4 ymm8
+#define XFER ymm9
+#define XTMP5 ymm11
+
+#define SHUF_00BA ymm10 /* shuffle xBxA -> 00BA */
+#define SHUF_DC00 ymm12 /* shuffle xDxC -> DC00 */
+#define BYTE_FLIP_MASK ymm13
+
+#define X_BYTE_FLIP_MASK xmm13 /* XMM version of BYTE_FLIP_MASK */
+
+#define NUM_BLKS rdx /* 3rd arg */
+#define CTX rsi /* 2nd arg */
+#define INP rdi /* 1st arg */
+#define c ecx
+#define d r8d
+#define e edx /* clobbers NUM_BLKS */
+#define y3 edi /* clobbers INP */
+
+#define TBL rbp
+#define SRND CTX /* SRND is same register as CTX */
+
+#define a eax
+#define b ebx
+#define f r9d
+#define g r10d
+#define h r11d
+#define old_h r11d
+
+#define T1 r12d
+#define y0 r13d
+#define y1 r14d
+#define y2 r15d
+
+
+#define _XFER_SIZE 2*64*4 /* 2 blocks, 64 rounds, 4 bytes/round */
+#define _XMM_SAVE_SIZE 0
+#define _INP_END_SIZE 8
+#define _INP_SIZE 8
+#define _CTX_SIZE 8
+#define _RSP_SIZE 8
+
+#define _XFER 0
+#define _XMM_SAVE _XFER + _XFER_SIZE
+#define _INP_END _XMM_SAVE + _XMM_SAVE_SIZE
+#define _INP _INP_END + _INP_END_SIZE
+#define _CTX _INP + _INP_SIZE
+#define _RSP _CTX + _CTX_SIZE
+#define STACK_SIZE _RSP + _RSP_SIZE
+
+#define ONE_ROUND_PART1(XFERIN, a, b, c, d, e, f, g, h) \
+ /* h += Sum1 (e) + Ch (e, f, g) + (k[t] + w[0]); */ \
+ /* d += h; */ \
+ /* h += Sum0 (a) + Maj (a, b, c); */ \
+ \
+ /* Ch(x, y, z) => ((x & y) + (~x & z)) */ \
+ /* Maj(x, y, z) => ((x & y) + (z & (x ^ y))) */ \
+ \
+ mov y3, e; \
+ add h, [XFERIN]; \
+ and y3, f; \
+ rorx y0, e, 25; \
+ rorx y1, e, 11; \
+ lea h, [h + y3]; \
+ andn y3, e, g; \
+ rorx T1, a, 13; \
+ xor y0, y1; \
+ lea h, [h + y3]
+
+#define ONE_ROUND_PART2(a, b, c, d, e, f, g, h) \
+ rorx y2, a, 22; \
+ rorx y1, e, 6; \
+ mov y3, a; \
+ xor T1, y2; \
+ xor y0, y1; \
+ xor y3, b; \
+ lea h, [h + y0]; \
+ mov y0, a; \
+ rorx y2, a, 2; \
+ add d, h; \
+ and y3, c; \
+ xor T1, y2; \
+ lea h, [h + y3]; \
+ lea h, [h + T1]; \
+ and y0, b; \
+ lea h, [h + y0]
+
+#define ONE_ROUND(XFER, a, b, c, d, e, f, g, h) \
+ ONE_ROUND_PART1(XFER, a, b, c, d, e, f, g, h); \
+ ONE_ROUND_PART2(a, b, c, d, e, f, g, h)
+
+#define FOUR_ROUNDS_AND_SCHED(XFERIN, XFEROUT, X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ vpalignr XTMP0, X3, X2, 4 /* XTMP0 = W[-7] */; \
+ vpaddd XTMP0, XTMP0, X0 /* XTMP0 = W[-7] + W[-16]; y1 = (e >> 6); S1 */; \
+ vpalignr XTMP1, X1, X0, 4 /* XTMP1 = W[-15] */; \
+ vpsrld XTMP2, XTMP1, 7; \
+ vpslld XTMP3, XTMP1, (32-7); \
+ vpor XTMP3, XTMP3, XTMP2 /* XTMP3 = W[-15] ror 7 */; \
+ vpsrld XTMP2, XTMP1,18; \
+ \
+ ONE_ROUND(0*4+XFERIN, a, b, c, d, e, f, g, h); \
+ \
+ /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ vpsrld XTMP4, XTMP1, 3 /* XTMP4 = W[-15] >> 3 */; \
+ vpslld XTMP1, XTMP1, (32-18); \
+ vpxor XTMP3, XTMP3, XTMP1; \
+ vpxor XTMP3, XTMP3, XTMP2 /* XTMP3 = W[-15] ror 7 ^ W[-15] ror 18 */; \
+ vpxor XTMP1, XTMP3, XTMP4 /* XTMP1 = s0 */; \
+ vpshufd XTMP2, X3, 0b11111010 /* XTMP2 = W[-2] {BBAA} */; \
+ vpaddd XTMP0, XTMP0, XTMP1 /* XTMP0 = W[-16] + W[-7] + s0 */; \
+ vpsrld XTMP4, XTMP2, 10 /* XTMP4 = W[-2] >> 10 {BBAA} */; \
+ \
+ ONE_ROUND(1*4+XFERIN, h, a, b, c, d, e, f, g); \
+ \
+ /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ vpsrlq XTMP3, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xBxA} */; \
+ vpsrlq XTMP2, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xBxA} */; \
+ vpxor XTMP2, XTMP2, XTMP3; \
+ vpxor XTMP4, XTMP4, XTMP2 /* XTMP4 = s1 {xBxA} */; \
+ vpshufb XTMP4, XTMP4, SHUF_00BA /* XTMP4 = s1 {00BA} */; \
+ vpaddd XTMP0, XTMP0, XTMP4 /* XTMP0 = {..., ..., W[1], W[0]} */; \
+ vpshufd XTMP2, XTMP0, 0b1010000 /* XTMP2 = W[-2] {DDCC} */; \
+ \
+ ONE_ROUND(2*4+XFERIN, g, h, a, b, c, d, e, f); \
+ \
+ /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ vpsrld XTMP5, XTMP2, 10 /* XTMP5 = W[-2] >> 10 {DDCC} */; \
+ vpsrlq XTMP3, XTMP2, 19 /* XTMP3 = W[-2] ror 19 {xDxC} */; \
+ vpsrlq XTMP2, XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xDxC} */; \
+ vpxor XTMP2, XTMP2, XTMP3; \
+ vpxor XTMP5, XTMP5, XTMP2 /* XTMP5 = s1 {xDxC} */; \
+ vpshufb XTMP5, XTMP5, SHUF_DC00 /* XTMP5 = s1 {DC00} */; \
+ vpaddd X0, XTMP5, XTMP0 /* X0 = {W[3], W[2], W[1], W[0]} */; \
+ vpaddd XFER, X0, [TBL + XFEROUT]; \
+ \
+ ONE_ROUND_PART1(3*4+XFERIN, f, g, h, a, b, c, d, e); \
+ vmovdqa [rsp + _XFER + XFEROUT], XFER; \
+ ONE_ROUND_PART2(f, g, h, a, b, c, d, e);
+
+#define DO_4ROUNDS(XFERIN, a, b, c, d, e, f, g, h) \
+ ONE_ROUND(0*4+XFERIN, a, b, c, d, e, f, g, h); \
+ ONE_ROUND(1*4+XFERIN, h, a, b, c, d, e, f, g); \
+ ONE_ROUND(2*4+XFERIN, g, h, a, b, c, d, e, f); \
+ ONE_ROUND(3*4+XFERIN, f, g, h, a, b, c, d, e)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; void sha256_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
+;; arg 1 : pointer to input data
+;; arg 2 : pointer to digest
+;; arg 3 : Num blocks
+*/
+.text
+.globl _gcry_sha256_transform_amd64_avx2
+ELF(.type _gcry_sha256_transform_amd64_avx2,@function)
+.align 32
+_gcry_sha256_transform_amd64_avx2:
+ CFI_STARTPROC()
+ xor eax, eax
+
+ cmp rdx, 0
+ je .Lnowork
+
+ push rbx
+ CFI_PUSH(rbx)
+ push rbp
+ CFI_PUSH(rbp)
+ push r12
+ CFI_PUSH(r12)
+ push r13
+ CFI_PUSH(r13)
+ push r14
+ CFI_PUSH(r14)
+ push r15
+ CFI_PUSH(r15)
+
+ vzeroupper
+
+ vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+ vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+ vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+ mov rax, rsp
+ CFI_DEF_CFA_REGISTER(rax);
+ sub rsp, STACK_SIZE
+ and rsp, ~63
+ mov [rsp + _RSP], rax
+ CFI_CFA_ON_STACK(_RSP, 6 * 8)
+
+ shl NUM_BLKS, 6 /* convert to bytes */
+ lea NUM_BLKS, [NUM_BLKS + INP - 64] /* pointer to last block */
+ mov [rsp + _INP_END], NUM_BLKS
+
+ /* Check if only one block of input. Note: Loading initial digest
+ * only uses 'mov' instruction and does not change condition
+ * flags. */
+ cmp NUM_BLKS, INP
+
+ /* ; load initial digest */
+ mov a,[4*0 + CTX]
+ mov b,[4*1 + CTX]
+ mov c,[4*2 + CTX]
+ mov d,[4*3 + CTX]
+ mov e,[4*4 + CTX]
+ mov f,[4*5 + CTX]
+ mov g,[4*6 + CTX]
+ mov h,[4*7 + CTX]
+
+ mov [rsp + _CTX], CTX
+
+ je .Ldo_last_block
+
+.Loop0:
+ lea TBL, [.LK256 ADD_RIP]
+
+ /* ; Load first 16 dwords from two blocks */
+ VMOVDQ XTMP0, [INP + 0*32]
+ VMOVDQ XTMP1, [INP + 1*32]
+ VMOVDQ XTMP2, [INP + 2*32]
+ VMOVDQ XTMP3, [INP + 3*32]
+
+ /* ; byte swap data */
+ vpshufb XTMP0, XTMP0, BYTE_FLIP_MASK
+ vpshufb XTMP1, XTMP1, BYTE_FLIP_MASK
+ vpshufb XTMP2, XTMP2, BYTE_FLIP_MASK
+ vpshufb XTMP3, XTMP3, BYTE_FLIP_MASK
+
+ /* ; transpose data into high/low halves */
+ vperm2i128 X0, XTMP0, XTMP2, 0x20
+ vperm2i128 X1, XTMP0, XTMP2, 0x31
+ vperm2i128 X2, XTMP1, XTMP3, 0x20
+ vperm2i128 X3, XTMP1, XTMP3, 0x31
+
+.Last_block_enter:
+ add INP, 64
+ mov [rsp + _INP], INP
+
+ /* ; schedule 48 input dwords, by doing 3 rounds of 12 each */
+ xor SRND, SRND
+
+ vpaddd XFER, X0, [TBL + 0*32]
+ vmovdqa [rsp + _XFER + 0*32], XFER
+ vpaddd XFER, X1, [TBL + 1*32]
+ vmovdqa [rsp + _XFER + 1*32], XFER
+ vpaddd XFER, X2, [TBL + 2*32]
+ vmovdqa [rsp + _XFER + 2*32], XFER
+ vpaddd XFER, X3, [TBL + 3*32]
+ vmovdqa [rsp + _XFER + 3*32], XFER
+
+.align 16
+.Loop1:
+ FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 0*32, SRND + 4*32, X0, X1, X2, X3, a, b, c, d, e, f, g, h)
+ FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 1*32, SRND + 5*32, X1, X2, X3, X0, e, f, g, h, a, b, c, d)
+ FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 2*32, SRND + 6*32, X2, X3, X0, X1, a, b, c, d, e, f, g, h)
+ FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 3*32, SRND + 7*32, X3, X0, X1, X2, e, f, g, h, a, b, c, d)
+
+ add SRND, 4*32
+ cmp SRND, 3 * 4*32
+ jb .Loop1
+
+ /* ; Do last 16 rounds with no scheduling */
+ DO_4ROUNDS(rsp + _XFER + (3*4*32 + 0*32), a, b, c, d, e, f, g, h)
+ DO_4ROUNDS(rsp + _XFER + (3*4*32 + 1*32), e, f, g, h, a, b, c, d)
+ DO_4ROUNDS(rsp + _XFER + (3*4*32 + 2*32), a, b, c, d, e, f, g, h)
+ DO_4ROUNDS(rsp + _XFER + (3*4*32 + 3*32), e, f, g, h, a, b, c, d)
+
+ mov CTX, [rsp + _CTX]
+ mov INP, [rsp + _INP]
+
+ addm([4*0 + CTX],a)
+ addm([4*1 + CTX],b)
+ addm([4*2 + CTX],c)
+ addm([4*3 + CTX],d)
+ addm([4*4 + CTX],e)
+ addm([4*5 + CTX],f)
+ addm([4*6 + CTX],g)
+ addm([4*7 + CTX],h)
+
+ cmp INP, [rsp + _INP_END]
+ ja .Ldone_hash
+
+ /* ;;; Do second block using previously scheduled results */
+ xor SRND, SRND
+.align 16
+.Loop3:
+ DO_4ROUNDS(rsp + _XFER + SRND + 0*32 + 16, a, b, c, d, e, f, g, h)
+ DO_4ROUNDS(rsp + _XFER + SRND + 1*32 + 16, e, f, g, h, a, b, c, d)
+ add SRND, 2*32
+ cmp SRND, 4 * 4*32
+ jb .Loop3
+
+ mov CTX, [rsp + _CTX]
+ mov INP, [rsp + _INP]
+ add INP, 64
+
+ addm([4*0 + CTX],a)
+ addm([4*1 + CTX],b)
+ addm([4*2 + CTX],c)
+ addm([4*3 + CTX],d)
+ addm([4*4 + CTX],e)
+ addm([4*5 + CTX],f)
+ addm([4*6 + CTX],g)
+ addm([4*7 + CTX],h)
+
+ cmp INP, [rsp + _INP_END]
+ jb .Loop0
+ ja .Ldone_hash
+
+.Ldo_last_block:
+ /* ;;; do last block */
+ lea TBL, [.LK256 ADD_RIP]
+
+ VMOVDQ XWORD0, [INP + 0*16]
+ VMOVDQ XWORD1, [INP + 1*16]
+ VMOVDQ XWORD2, [INP + 2*16]
+ VMOVDQ XWORD3, [INP + 3*16]
+
+ vpshufb XWORD0, XWORD0, X_BYTE_FLIP_MASK
+ vpshufb XWORD1, XWORD1, X_BYTE_FLIP_MASK
+ vpshufb XWORD2, XWORD2, X_BYTE_FLIP_MASK
+ vpshufb XWORD3, XWORD3, X_BYTE_FLIP_MASK
+
+ jmp .Last_block_enter
+
+.Lonly_one_block:
+
+ /* ; load initial digest */
+ mov a,[4*0 + CTX]
+ mov b,[4*1 + CTX]
+ mov c,[4*2 + CTX]
+ mov d,[4*3 + CTX]
+ mov e,[4*4 + CTX]
+ mov f,[4*5 + CTX]
+ mov g,[4*6 + CTX]
+ mov h,[4*7 + CTX]
+
+ vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+ vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+ vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+ mov [rsp + _CTX], CTX
+ jmp .Ldo_last_block
+
+.Ldone_hash:
+ vzeroall
+
+ /* burn stack */
+ vmovdqa [rsp + _XFER + 0 * 32], ymm0
+ vmovdqa [rsp + _XFER + 1 * 32], ymm0
+ vmovdqa [rsp + _XFER + 2 * 32], ymm0
+ vmovdqa [rsp + _XFER + 3 * 32], ymm0
+ vmovdqa [rsp + _XFER + 4 * 32], ymm0
+ vmovdqa [rsp + _XFER + 5 * 32], ymm0
+ vmovdqa [rsp + _XFER + 6 * 32], ymm0
+ vmovdqa [rsp + _XFER + 7 * 32], ymm0
+ vmovdqa [rsp + _XFER + 8 * 32], ymm0
+ vmovdqa [rsp + _XFER + 9 * 32], ymm0
+ vmovdqa [rsp + _XFER + 10 * 32], ymm0
+ vmovdqa [rsp + _XFER + 11 * 32], ymm0
+ vmovdqa [rsp + _XFER + 12 * 32], ymm0
+ vmovdqa [rsp + _XFER + 13 * 32], ymm0
+ vmovdqa [rsp + _XFER + 14 * 32], ymm0
+ vmovdqa [rsp + _XFER + 15 * 32], ymm0
+ xor eax, eax
+
+ mov rsp, [rsp + _RSP]
+ CFI_DEF_CFA_REGISTER(rsp)
+
+ pop r15
+ CFI_POP(r15)
+ pop r14
+ CFI_POP(r14)
+ pop r13
+ CFI_POP(r13)
+ pop r12
+ CFI_POP(r12)
+ pop rbp
+ CFI_POP(rbp)
+ pop rbx
+ CFI_POP(rbx)
+
+.Lnowork:
+ ret
+ CFI_ENDPROC()
+
+.align 64
+.LK256:
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.LPSHUFFLE_BYTE_FLIP_MASK:
+ .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
+
+/* shuffle xBxA -> 00BA */
+.L_SHUF_00BA:
+ .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
+
+/* shuffle xDxC -> DC00 */
+.L_SHUF_DC00:
+ .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha256-intel-shaext.c b/comm/third_party/libgcrypt/cipher/sha256-intel-shaext.c
new file mode 100644
index 0000000000..48c09eefe1
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256-intel-shaext.c
@@ -0,0 +1,363 @@
+/* sha256-intel-shaext.S - SHAEXT accelerated SHA-256 transform function
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "types.h"
+
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+ defined(HAVE_GCC_INLINE_ASM_SSE41) && defined(USE_SHA256) && \
+ defined(ENABLE_SHAEXT_SUPPORT)
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+# pragma GCC target("no-sse")
+#endif
+#if __clang__
+# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
+#endif
+
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+
+/* Two macros to be called prior and after the use of SHA-EXT
+ instructions. There should be no external function calls between
+ the use of these macros. There purpose is to make sure that the
+ SSE regsiters are cleared and won't reveal any information about
+ the key or the data. */
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define shaext_prepare_variable char win64tmp[2*16]
+# define shaext_prepare_variable_size sizeof(win64tmp)
+# define shaext_prepare() \
+ do { asm volatile ("movdqu %%xmm6, (%0)\n" \
+ "movdqu %%xmm7, (%1)\n" \
+ : \
+ : "r" (&win64tmp[0]), "r" (&win64tmp[16]) \
+ : "memory"); \
+ } while (0)
+# define shaext_cleanup(tmp0,tmp1) \
+ do { asm volatile ("movdqu (%0), %%xmm6\n" \
+ "movdqu (%1), %%xmm7\n" \
+ "pxor %%xmm0, %%xmm0\n" \
+ "pxor %%xmm1, %%xmm1\n" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "movdqa %%xmm0, (%2)\n\t" \
+ "movdqa %%xmm0, (%3)\n\t" \
+ : \
+ : "r" (&win64tmp[0]), "r" (&win64tmp[16]), \
+ "r" (tmp0), "r" (tmp1) \
+ : "memory"); \
+ } while (0)
+#else
+# define shaext_prepare_variable
+# define shaext_prepare_variable_size 0
+# define shaext_prepare() do { } while (0)
+# define shaext_cleanup(tmp0,tmp1) \
+ do { asm volatile ("pxor %%xmm0, %%xmm0\n" \
+ "pxor %%xmm1, %%xmm1\n" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "pxor %%xmm6, %%xmm6\n" \
+ "pxor %%xmm7, %%xmm7\n" \
+ "movdqa %%xmm0, (%0)\n\t" \
+ "movdqa %%xmm0, (%1)\n\t" \
+ : \
+ : "r" (tmp0), "r" (tmp1) \
+ : "memory"); \
+ } while (0)
+#endif
+
+typedef struct u128_s
+{
+ u32 a, b, c, d;
+} u128_t;
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ */
+unsigned int ASM_FUNC_ATTR
+_gcry_sha256_transform_intel_shaext(u32 state[8], const unsigned char *data,
+ size_t nblks)
+{
+ static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
+ { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+ static const u128_t K[16] __attribute__ ((aligned (16))) =
+ {
+ { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 },
+ { 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 },
+ { 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 },
+ { 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 },
+ { 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc },
+ { 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da },
+ { 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 },
+ { 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 },
+ { 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 },
+ { 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 },
+ { 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 },
+ { 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 },
+ { 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 },
+ { 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 },
+ { 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 },
+ { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }
+ };
+ char save_buf[2 * 16 + 15];
+ char *abef_save;
+ char *cdgh_save;
+ shaext_prepare_variable;
+
+ if (nblks == 0)
+ return 0;
+
+ shaext_prepare ();
+
+ asm volatile ("" : "=r" (abef_save) : "0" (save_buf) : "memory");
+ abef_save = abef_save + (-(uintptr_t)abef_save & 15);
+ cdgh_save = abef_save + 16;
+
+ /* byteswap mask => XMM7 */
+ asm volatile ("movdqa %[mask], %%xmm7\n\t" /* Preload mask */
+ :
+ : [mask] "m" (*bshuf_mask)
+ : "memory");
+
+ /* Load state.. ABEF_SAVE => STATE0 XMM1, CDGH_STATE => STATE1 XMM2 */
+ asm volatile ("movups 16(%[state]), %%xmm1\n\t" /* HGFE (xmm=EFGH) */
+ "movups 0(%[state]), %%xmm0\n\t" /* DCBA (xmm=ABCD) */
+ "movaps %%xmm1, %%xmm2\n\t"
+ "shufps $0x11, %%xmm0, %%xmm1\n\t" /* ABEF (xmm=FEBA) */
+ "shufps $0xbb, %%xmm0, %%xmm2\n\t" /* CDGH (xmm=HGDC) */
+ :
+ : [state] "r" (state)
+ : "memory" );
+
+ /* Load message */
+ asm volatile ("movdqu 0*16(%[data]), %%xmm3\n\t"
+ "movdqu 1*16(%[data]), %%xmm4\n\t"
+ "movdqu 2*16(%[data]), %%xmm5\n\t"
+ "movdqu 3*16(%[data]), %%xmm6\n\t"
+ "pshufb %%xmm7, %%xmm3\n\t"
+ "pshufb %%xmm7, %%xmm4\n\t"
+ "pshufb %%xmm7, %%xmm5\n\t"
+ "pshufb %%xmm7, %%xmm6\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+ data += 64;
+
+ do
+ {
+ /* Save state */
+ asm volatile ("movdqa %%xmm1, (%[abef_save])\n\t"
+ "movdqa %%xmm2, (%[cdgh_save])\n\t"
+ :
+ : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save)
+ : "memory" );
+
+ /* Round 0..3 */
+ asm volatile ("movdqa %%xmm3, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ :
+ : [constants] "m" (K[0].a)
+ : "memory" );
+
+ /* Round 4..7 */
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ "sha256msg1 %%xmm4, %%xmm3\n\t"
+ :
+ : [constants] "m" (K[1].a)
+ : "memory" );
+
+ /* Round 8..11 */
+ asm volatile ("movdqa %%xmm5, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ "sha256msg1 %%xmm5, %%xmm4\n\t"
+ :
+ : [constants] "m" (K[2].a)
+ : "memory" );
+
+#define ROUND(k, MSG0, MSG1, MSG2, MSG3) \
+ asm volatile ("movdqa %%"MSG0", %%xmm0\n\t" \
+ "paddd %[constants], %%xmm0\n\t" \
+ "sha256rnds2 %%xmm1, %%xmm2\n\t" \
+ "movdqa %%"MSG0", %%xmm7\n\t" \
+ "palignr $4, %%"MSG3", %%xmm7\n\t" \
+ "paddd %%xmm7, %%"MSG1"\n\t" \
+ "sha256msg2 %%"MSG0", %%"MSG1"\n\t" \
+ "psrldq $8, %%xmm0\n\t" \
+ "sha256rnds2 %%xmm2, %%xmm1\n\t" \
+ "sha256msg1 %%"MSG0", %%"MSG3"\n\t" \
+ : \
+ : [constants] "m" (K[k].a) \
+ : "memory" )
+
+ /* Rounds 12..15 to 48..51 */
+ ROUND(3, "xmm6", "xmm3", "xmm4", "xmm5");
+ ROUND(4, "xmm3", "xmm4", "xmm5", "xmm6");
+ ROUND(5, "xmm4", "xmm5", "xmm6", "xmm3");
+ ROUND(6, "xmm5", "xmm6", "xmm3", "xmm4");
+ ROUND(7, "xmm6", "xmm3", "xmm4", "xmm5");
+ ROUND(8, "xmm3", "xmm4", "xmm5", "xmm6");
+ ROUND(9, "xmm4", "xmm5", "xmm6", "xmm3");
+ ROUND(10, "xmm5", "xmm6", "xmm3", "xmm4");
+ ROUND(11, "xmm6", "xmm3", "xmm4", "xmm5");
+ ROUND(12, "xmm3", "xmm4", "xmm5", "xmm6");
+
+ if (--nblks == 0)
+ break;
+
+ /* Round 52..55 */
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "movdqa %%xmm4, %%xmm7\n\t"
+ "palignr $4, %%xmm3, %%xmm7\n\t"
+ "movdqu 0*16(%[data]), %%xmm3\n\t"
+ "paddd %%xmm7, %%xmm5\n\t"
+ "sha256msg2 %%xmm4, %%xmm5\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ :
+ : [constants] "m" (K[13].a), [data] "r" (data)
+ : "memory" );
+
+ /* Round 56..59 */
+ asm volatile ("movdqa %%xmm5, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "movdqa %%xmm5, %%xmm7\n\t"
+ "palignr $4, %%xmm4, %%xmm7\n\t"
+ "movdqu 1*16(%[data]), %%xmm4\n\t"
+ "paddd %%xmm7, %%xmm6\n\t"
+ "movdqa %[mask], %%xmm7\n\t" /* Reload mask */
+ "sha256msg2 %%xmm5, %%xmm6\n\t"
+ "movdqu 2*16(%[data]), %%xmm5\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ :
+ : [constants] "m" (K[14].a), [mask] "m" (*bshuf_mask),
+ [data] "r" (data)
+ : "memory" );
+
+ /* Round 60..63 */
+ asm volatile ("movdqa %%xmm6, %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm3\n\t"
+ "movdqu 3*16(%[data]), %%xmm6\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm4\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm5\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ :
+ : [constants] "m" (K[15].a), [data] "r" (data)
+ : "memory" );
+ data += 64;
+
+ /* Merge states */
+ asm volatile ("paddd (%[abef_save]), %%xmm1\n\t"
+ "paddd (%[cdgh_save]), %%xmm2\n\t"
+ "pshufb %%xmm7, %%xmm6\n\t"
+ :
+ : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save)
+ : "memory" );
+ }
+ while (1);
+
+ /* Round 52..55 */
+ asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "movdqa %%xmm4, %%xmm7\n\t"
+ "palignr $4, %%xmm3, %%xmm7\n\t"
+ "paddd %%xmm7, %%xmm5\n\t"
+ "sha256msg2 %%xmm4, %%xmm5\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ :
+ : [constants] "m" (K[13].a)
+ : "memory" );
+
+ /* Round 56..59 */
+ asm volatile ("movdqa %%xmm5, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "movdqa %%xmm5, %%xmm7\n\t"
+ "palignr $4, %%xmm4, %%xmm7\n\t"
+ "paddd %%xmm7, %%xmm6\n\t"
+ "movdqa %[mask], %%xmm7\n\t" /* Reload mask */
+ "sha256msg2 %%xmm5, %%xmm6\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ :
+ : [constants] "m" (K[14].a), [mask] "m" (*bshuf_mask)
+ : "memory" );
+
+ /* Round 60..63 */
+ asm volatile ("movdqa %%xmm6, %%xmm0\n\t"
+ "paddd %[constants], %%xmm0\n\t"
+ "sha256rnds2 %%xmm1, %%xmm2\n\t"
+ "psrldq $8, %%xmm0\n\t"
+ "sha256rnds2 %%xmm2, %%xmm1\n\t"
+ :
+ : [constants] "m" (K[15].a)
+ : "memory" );
+
+ /* Merge states */
+ asm volatile ("paddd (%[abef_save]), %%xmm1\n\t"
+ "paddd (%[cdgh_save]), %%xmm2\n\t"
+ :
+ : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save)
+ : "memory" );
+
+ /* Save state (XMM1=FEBA, XMM2=HGDC) */
+ asm volatile ("movaps %%xmm1, %%xmm0\n\t"
+ "shufps $0x11, %%xmm2, %%xmm1\n\t" /* xmm=ABCD */
+ "shufps $0xbb, %%xmm2, %%xmm0\n\t" /* xmm=EFGH */
+ "movups %%xmm1, 16(%[state])\n\t"
+ "movups %%xmm0, 0(%[state])\n\t"
+ :
+ : [state] "r" (state)
+ : "memory" );
+
+ shaext_cleanup (abef_save, cdgh_save);
+ return 0;
+}
+
+#if __clang__
+# pragma clang attribute pop
+#endif
+
+#endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */
diff --git a/comm/third_party/libgcrypt/cipher/sha256-ppc.c b/comm/third_party/libgcrypt/cipher/sha256-ppc.c
new file mode 100644
index 0000000000..a9b59714d2
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256-ppc.c
@@ -0,0 +1,795 @@
+/* sha256-ppc.c - PowerPC vcrypto implementation of SHA-256 transform
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+ defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+ defined(USE_SHA256) && \
+ __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned int vector4x_u32;
+typedef vector unsigned long long vector2x_u64;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static const u32 K[64] =
+ {
+#define TBL(v) v
+ TBL(0x428a2f98), TBL(0x71374491), TBL(0xb5c0fbcf), TBL(0xe9b5dba5),
+ TBL(0x3956c25b), TBL(0x59f111f1), TBL(0x923f82a4), TBL(0xab1c5ed5),
+ TBL(0xd807aa98), TBL(0x12835b01), TBL(0x243185be), TBL(0x550c7dc3),
+ TBL(0x72be5d74), TBL(0x80deb1fe), TBL(0x9bdc06a7), TBL(0xc19bf174),
+ TBL(0xe49b69c1), TBL(0xefbe4786), TBL(0x0fc19dc6), TBL(0x240ca1cc),
+ TBL(0x2de92c6f), TBL(0x4a7484aa), TBL(0x5cb0a9dc), TBL(0x76f988da),
+ TBL(0x983e5152), TBL(0xa831c66d), TBL(0xb00327c8), TBL(0xbf597fc7),
+ TBL(0xc6e00bf3), TBL(0xd5a79147), TBL(0x06ca6351), TBL(0x14292967),
+ TBL(0x27b70a85), TBL(0x2e1b2138), TBL(0x4d2c6dfc), TBL(0x53380d13),
+ TBL(0x650a7354), TBL(0x766a0abb), TBL(0x81c2c92e), TBL(0x92722c85),
+ TBL(0xa2bfe8a1), TBL(0xa81a664b), TBL(0xc24b8b70), TBL(0xc76c51a3),
+ TBL(0xd192e819), TBL(0xd6990624), TBL(0xf40e3585), TBL(0x106aa070),
+ TBL(0x19a4c116), TBL(0x1e376c08), TBL(0x2748774c), TBL(0x34b0bcb5),
+ TBL(0x391c0cb3), TBL(0x4ed8aa4a), TBL(0x5b9cca4f), TBL(0x682e6ff3),
+ TBL(0x748f82ee), TBL(0x78a5636f), TBL(0x84c87814), TBL(0x8cc70208),
+ TBL(0x90befffa), TBL(0xa4506ceb), TBL(0xbef9a3f7), TBL(0xc67178f2)
+#undef TBL
+ };
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_rol_elems(vector4x_u32 v, unsigned int idx)
+{
+#ifndef WORDS_BIGENDIAN
+ return vec_sld (v, v, (16 - (4 * idx)) & 15);
+#else
+ return vec_sld (v, v, (4 * idx) & 15);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_merge_idx0_elems(vector4x_u32 v0, vector4x_u32 v1,
+ vector4x_u32 v2, vector4x_u32 v3)
+{
+ return (vector4x_u32)vec_mergeh ((vector2x_u64) vec_mergeh(v0, v1),
+ (vector2x_u64) vec_mergeh(v2, v3));
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_ror_u32(vector4x_u32 v, unsigned int shift)
+{
+ return (v >> (shift & 31)) ^ (v << ((32 - shift) & 31));
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_vshasigma_u32(vector4x_u32 v, unsigned int a, unsigned int b)
+{
+ asm ("vshasigmaw %0,%1,%2,%3"
+ : "=v" (v)
+ : "v" (v), "g" (a), "g" (b)
+ : "memory");
+ return v;
+}
+
+
+/* SHA2 round in vector registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do \
+ { \
+ t1 = (h); \
+ t1 += ((k) + (w)); \
+ t1 += Cho((e),(f),(g)); \
+ t1 += Sum1((e)); \
+ t2 = Sum0((a)); \
+ t2 += Maj((a),(b),(c)); \
+ d += t1; \
+ h = t1 + t2; \
+ } while (0)
+
+#define Cho(b, c, d) (vec_sel(d, c, b))
+
+#define Maj(c, d, b) (vec_sel(c, b, c ^ d))
+
+#define Sum0(x) (vec_vshasigma_u32(x, 1, 0))
+
+#define Sum1(x) (vec_vshasigma_u32(x, 1, 15))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3))
+#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10))
+
+#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
+#define W(i) ({ w[i&0x0f] += w[(i-7) &0x0f]; \
+ w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+ w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+ w[i&0x0f]; })
+
+#define I2(i) ( w2[i] = buf_get_be32(64 + data + i * 4), I(i) )
+#define W2(i) ({ w2[i] = w2[i-7]; \
+ w2[i] += S1(w2[i-2]); \
+ w2[i] += S0(w2[i-15]); \
+ w2[i] += w2[i-16]; \
+ W(i); })
+#define R2(i) ( w2[i] )
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha256_transform_ppc8(u32 state[8], const unsigned char *data,
+ size_t nblks)
+{
+ /* GPRs used for message expansion as vector intrinsics based generates
+ * slower code. */
+ vector4x_u32 h0, h1, h2, h3, h4, h5, h6, h7;
+ vector4x_u32 h0_h3, h4_h7;
+ vector4x_u32 a, b, c, d, e, f, g, h, t1, t2;
+ u32 w[16];
+ u32 w2[64];
+
+ h0_h3 = vec_vsx_ld (4 * 0, state);
+ h4_h7 = vec_vsx_ld (4 * 4, state);
+
+ h0 = h0_h3;
+ h1 = vec_rol_elems (h0_h3, 1);
+ h2 = vec_rol_elems (h0_h3, 2);
+ h3 = vec_rol_elems (h0_h3, 3);
+ h4 = h4_h7;
+ h5 = vec_rol_elems (h4_h7, 1);
+ h6 = vec_rol_elems (h4_h7, 2);
+ h7 = vec_rol_elems (h4_h7, 3);
+
+ while (nblks >= 2)
+ {
+ a = h0;
+ b = h1;
+ c = h2;
+ d = h3;
+ e = h4;
+ f = h5;
+ g = h6;
+ h = h7;
+
+ R(a, b, c, d, e, f, g, h, K[0], I2(0));
+ R(h, a, b, c, d, e, f, g, K[1], I2(1));
+ R(g, h, a, b, c, d, e, f, K[2], I2(2));
+ R(f, g, h, a, b, c, d, e, K[3], I2(3));
+ R(e, f, g, h, a, b, c, d, K[4], I2(4));
+ R(d, e, f, g, h, a, b, c, K[5], I2(5));
+ R(c, d, e, f, g, h, a, b, K[6], I2(6));
+ R(b, c, d, e, f, g, h, a, K[7], I2(7));
+ R(a, b, c, d, e, f, g, h, K[8], I2(8));
+ R(h, a, b, c, d, e, f, g, K[9], I2(9));
+ R(g, h, a, b, c, d, e, f, K[10], I2(10));
+ R(f, g, h, a, b, c, d, e, K[11], I2(11));
+ R(e, f, g, h, a, b, c, d, K[12], I2(12));
+ R(d, e, f, g, h, a, b, c, K[13], I2(13));
+ R(c, d, e, f, g, h, a, b, K[14], I2(14));
+ R(b, c, d, e, f, g, h, a, K[15], I2(15));
+ data += 64 * 2;
+
+ R(a, b, c, d, e, f, g, h, K[16], W2(16));
+ R(h, a, b, c, d, e, f, g, K[17], W2(17));
+ R(g, h, a, b, c, d, e, f, K[18], W2(18));
+ R(f, g, h, a, b, c, d, e, K[19], W2(19));
+ R(e, f, g, h, a, b, c, d, K[20], W2(20));
+ R(d, e, f, g, h, a, b, c, K[21], W2(21));
+ R(c, d, e, f, g, h, a, b, K[22], W2(22));
+ R(b, c, d, e, f, g, h, a, K[23], W2(23));
+ R(a, b, c, d, e, f, g, h, K[24], W2(24));
+ R(h, a, b, c, d, e, f, g, K[25], W2(25));
+ R(g, h, a, b, c, d, e, f, K[26], W2(26));
+ R(f, g, h, a, b, c, d, e, K[27], W2(27));
+ R(e, f, g, h, a, b, c, d, K[28], W2(28));
+ R(d, e, f, g, h, a, b, c, K[29], W2(29));
+ R(c, d, e, f, g, h, a, b, K[30], W2(30));
+ R(b, c, d, e, f, g, h, a, K[31], W2(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W2(32));
+ R(h, a, b, c, d, e, f, g, K[33], W2(33));
+ R(g, h, a, b, c, d, e, f, K[34], W2(34));
+ R(f, g, h, a, b, c, d, e, K[35], W2(35));
+ R(e, f, g, h, a, b, c, d, K[36], W2(36));
+ R(d, e, f, g, h, a, b, c, K[37], W2(37));
+ R(c, d, e, f, g, h, a, b, K[38], W2(38));
+ R(b, c, d, e, f, g, h, a, K[39], W2(39));
+ R(a, b, c, d, e, f, g, h, K[40], W2(40));
+ R(h, a, b, c, d, e, f, g, K[41], W2(41));
+ R(g, h, a, b, c, d, e, f, K[42], W2(42));
+ R(f, g, h, a, b, c, d, e, K[43], W2(43));
+ R(e, f, g, h, a, b, c, d, K[44], W2(44));
+ R(d, e, f, g, h, a, b, c, K[45], W2(45));
+ R(c, d, e, f, g, h, a, b, K[46], W2(46));
+ R(b, c, d, e, f, g, h, a, K[47], W2(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W2(48));
+ R(h, a, b, c, d, e, f, g, K[49], W2(49));
+ R(g, h, a, b, c, d, e, f, K[50], W2(50));
+ R(f, g, h, a, b, c, d, e, K[51], W2(51));
+ R(e, f, g, h, a, b, c, d, K[52], W2(52));
+ R(d, e, f, g, h, a, b, c, K[53], W2(53));
+ R(c, d, e, f, g, h, a, b, K[54], W2(54));
+ R(b, c, d, e, f, g, h, a, K[55], W2(55));
+ R(a, b, c, d, e, f, g, h, K[56], W2(56));
+ R(h, a, b, c, d, e, f, g, K[57], W2(57));
+ R(g, h, a, b, c, d, e, f, K[58], W2(58));
+ R(f, g, h, a, b, c, d, e, K[59], W2(59));
+ R(e, f, g, h, a, b, c, d, K[60], W2(60));
+ R(d, e, f, g, h, a, b, c, K[61], W2(61));
+ R(c, d, e, f, g, h, a, b, K[62], W2(62));
+ R(b, c, d, e, f, g, h, a, K[63], W2(63));
+
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
+ h5 += f;
+ h6 += g;
+ h7 += h;
+
+ a = h0;
+ b = h1;
+ c = h2;
+ d = h3;
+ e = h4;
+ f = h5;
+ g = h6;
+ h = h7;
+
+ R(a, b, c, d, e, f, g, h, K[0], R2(0));
+ R(h, a, b, c, d, e, f, g, K[1], R2(1));
+ R(g, h, a, b, c, d, e, f, K[2], R2(2));
+ R(f, g, h, a, b, c, d, e, K[3], R2(3));
+ R(e, f, g, h, a, b, c, d, K[4], R2(4));
+ R(d, e, f, g, h, a, b, c, K[5], R2(5));
+ R(c, d, e, f, g, h, a, b, K[6], R2(6));
+ R(b, c, d, e, f, g, h, a, K[7], R2(7));
+ R(a, b, c, d, e, f, g, h, K[8], R2(8));
+ R(h, a, b, c, d, e, f, g, K[9], R2(9));
+ R(g, h, a, b, c, d, e, f, K[10], R2(10));
+ R(f, g, h, a, b, c, d, e, K[11], R2(11));
+ R(e, f, g, h, a, b, c, d, K[12], R2(12));
+ R(d, e, f, g, h, a, b, c, K[13], R2(13));
+ R(c, d, e, f, g, h, a, b, K[14], R2(14));
+ R(b, c, d, e, f, g, h, a, K[15], R2(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], R2(16));
+ R(h, a, b, c, d, e, f, g, K[17], R2(17));
+ R(g, h, a, b, c, d, e, f, K[18], R2(18));
+ R(f, g, h, a, b, c, d, e, K[19], R2(19));
+ R(e, f, g, h, a, b, c, d, K[20], R2(20));
+ R(d, e, f, g, h, a, b, c, K[21], R2(21));
+ R(c, d, e, f, g, h, a, b, K[22], R2(22));
+ R(b, c, d, e, f, g, h, a, K[23], R2(23));
+ R(a, b, c, d, e, f, g, h, K[24], R2(24));
+ R(h, a, b, c, d, e, f, g, K[25], R2(25));
+ R(g, h, a, b, c, d, e, f, K[26], R2(26));
+ R(f, g, h, a, b, c, d, e, K[27], R2(27));
+ R(e, f, g, h, a, b, c, d, K[28], R2(28));
+ R(d, e, f, g, h, a, b, c, K[29], R2(29));
+ R(c, d, e, f, g, h, a, b, K[30], R2(30));
+ R(b, c, d, e, f, g, h, a, K[31], R2(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], R2(32));
+ R(h, a, b, c, d, e, f, g, K[33], R2(33));
+ R(g, h, a, b, c, d, e, f, K[34], R2(34));
+ R(f, g, h, a, b, c, d, e, K[35], R2(35));
+ R(e, f, g, h, a, b, c, d, K[36], R2(36));
+ R(d, e, f, g, h, a, b, c, K[37], R2(37));
+ R(c, d, e, f, g, h, a, b, K[38], R2(38));
+ R(b, c, d, e, f, g, h, a, K[39], R2(39));
+ R(a, b, c, d, e, f, g, h, K[40], R2(40));
+ R(h, a, b, c, d, e, f, g, K[41], R2(41));
+ R(g, h, a, b, c, d, e, f, K[42], R2(42));
+ R(f, g, h, a, b, c, d, e, K[43], R2(43));
+ R(e, f, g, h, a, b, c, d, K[44], R2(44));
+ R(d, e, f, g, h, a, b, c, K[45], R2(45));
+ R(c, d, e, f, g, h, a, b, K[46], R2(46));
+ R(b, c, d, e, f, g, h, a, K[47], R2(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], R2(48));
+ R(h, a, b, c, d, e, f, g, K[49], R2(49));
+ R(g, h, a, b, c, d, e, f, K[50], R2(50));
+ R(f, g, h, a, b, c, d, e, K[51], R2(51));
+ R(e, f, g, h, a, b, c, d, K[52], R2(52));
+ R(d, e, f, g, h, a, b, c, K[53], R2(53));
+ R(c, d, e, f, g, h, a, b, K[54], R2(54));
+ R(b, c, d, e, f, g, h, a, K[55], R2(55));
+ R(a, b, c, d, e, f, g, h, K[56], R2(56));
+ R(h, a, b, c, d, e, f, g, K[57], R2(57));
+ R(g, h, a, b, c, d, e, f, K[58], R2(58));
+ R(f, g, h, a, b, c, d, e, K[59], R2(59));
+ R(e, f, g, h, a, b, c, d, K[60], R2(60));
+ R(d, e, f, g, h, a, b, c, K[61], R2(61));
+ R(c, d, e, f, g, h, a, b, K[62], R2(62));
+ R(b, c, d, e, f, g, h, a, K[63], R2(63));
+
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
+ h5 += f;
+ h6 += g;
+ h7 += h;
+
+ nblks -= 2;
+ }
+
+ while (nblks)
+ {
+ a = h0;
+ b = h1;
+ c = h2;
+ d = h3;
+ e = h4;
+ f = h5;
+ g = h6;
+ h = h7;
+
+ R(a, b, c, d, e, f, g, h, K[0], I(0));
+ R(h, a, b, c, d, e, f, g, K[1], I(1));
+ R(g, h, a, b, c, d, e, f, K[2], I(2));
+ R(f, g, h, a, b, c, d, e, K[3], I(3));
+ R(e, f, g, h, a, b, c, d, K[4], I(4));
+ R(d, e, f, g, h, a, b, c, K[5], I(5));
+ R(c, d, e, f, g, h, a, b, K[6], I(6));
+ R(b, c, d, e, f, g, h, a, K[7], I(7));
+ R(a, b, c, d, e, f, g, h, K[8], I(8));
+ R(h, a, b, c, d, e, f, g, K[9], I(9));
+ R(g, h, a, b, c, d, e, f, K[10], I(10));
+ R(f, g, h, a, b, c, d, e, K[11], I(11));
+ R(e, f, g, h, a, b, c, d, K[12], I(12));
+ R(d, e, f, g, h, a, b, c, K[13], I(13));
+ R(c, d, e, f, g, h, a, b, K[14], I(14));
+ R(b, c, d, e, f, g, h, a, K[15], I(15));
+ data += 64;
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
+ h5 += f;
+ h6 += g;
+ h7 += h;
+
+ nblks--;
+ }
+
+ h0_h3 = vec_merge_idx0_elems (h0, h1, h2, h3);
+ h4_h7 = vec_merge_idx0_elems (h4, h5, h6, h7);
+ vec_vsx_st (h0_h3, 4 * 0, state);
+ vec_vsx_st (h4_h7, 4 * 4, state);
+
+ return sizeof(w2) + sizeof(w);
+}
+#undef R
+#undef Cho
+#undef Maj
+#undef Sum0
+#undef Sum1
+#undef S0
+#undef S1
+#undef I
+#undef W
+#undef I2
+#undef W2
+#undef R2
+
+
+/* SHA2 round in general purpose registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do \
+ { \
+ t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + ((k) + (w));\
+ t2 = Sum0((a)) + Maj((a),(b),(c)); \
+ d += t1; \
+ h = t1 + t2; \
+ } while (0)
+
+#define Cho(x, y, z) ((x & y) + (~x & z))
+
+#define Maj(z, x, y) ((x & y) + (z & (x ^ y)))
+
+#define Sum0(x) (ror (x, 2) ^ ror (x ^ ror (x, 22-13), 13))
+
+#define Sum1(x) (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3))
+#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10))
+
+#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
+#define WN(i) ({ w[i&0x0f] += w[(i-7) &0x0f]; \
+ w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+ w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+ w[i&0x0f]; })
+#define W(i) ({ u32 r = w[i&0x0f]; WN(i); r; })
+#define L(i) w[i&0x0f]
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha256_transform_ppc9(u32 state[8], const unsigned char *data,
+ size_t nblks)
+{
+ /* GPRs used for round function and message expansion as vector intrinsics
+ * based generates slower code for POWER9. */
+ u32 a, b, c, d, e, f, g, h, t1, t2;
+ u32 w[16];
+
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ f = state[5];
+ g = state[6];
+ h = state[7];
+
+ while (nblks >= 2)
+ {
+ I(0); I(1); I(2); I(3);
+ I(4); I(5); I(6); I(7);
+ I(8); I(9); I(10); I(11);
+ I(12); I(13); I(14); I(15);
+ data += 64;
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], L(48));
+ R(h, a, b, c, d, e, f, g, K[49], L(49));
+ R(g, h, a, b, c, d, e, f, K[50], L(50));
+ R(f, g, h, a, b, c, d, e, K[51], L(51));
+ I(0); I(1); I(2); I(3);
+ R(e, f, g, h, a, b, c, d, K[52], L(52));
+ R(d, e, f, g, h, a, b, c, K[53], L(53));
+ R(c, d, e, f, g, h, a, b, K[54], L(54));
+ R(b, c, d, e, f, g, h, a, K[55], L(55));
+ I(4); I(5); I(6); I(7);
+ R(a, b, c, d, e, f, g, h, K[56], L(56));
+ R(h, a, b, c, d, e, f, g, K[57], L(57));
+ R(g, h, a, b, c, d, e, f, K[58], L(58));
+ R(f, g, h, a, b, c, d, e, K[59], L(59));
+ I(8); I(9); I(10); I(11);
+ R(e, f, g, h, a, b, c, d, K[60], L(60));
+ R(d, e, f, g, h, a, b, c, K[61], L(61));
+ R(c, d, e, f, g, h, a, b, K[62], L(62));
+ R(b, c, d, e, f, g, h, a, K[63], L(63));
+ I(12); I(13); I(14); I(15);
+ data += 64;
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+ f += state[5];
+ g += state[6];
+ h += state[7];
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+ state[5] = f;
+ state[6] = g;
+ state[7] = h;
+
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], L(48));
+ R(h, a, b, c, d, e, f, g, K[49], L(49));
+ R(g, h, a, b, c, d, e, f, K[50], L(50));
+ R(f, g, h, a, b, c, d, e, K[51], L(51));
+ R(e, f, g, h, a, b, c, d, K[52], L(52));
+ R(d, e, f, g, h, a, b, c, K[53], L(53));
+ R(c, d, e, f, g, h, a, b, K[54], L(54));
+ R(b, c, d, e, f, g, h, a, K[55], L(55));
+ R(a, b, c, d, e, f, g, h, K[56], L(56));
+ R(h, a, b, c, d, e, f, g, K[57], L(57));
+ R(g, h, a, b, c, d, e, f, K[58], L(58));
+ R(f, g, h, a, b, c, d, e, K[59], L(59));
+ R(e, f, g, h, a, b, c, d, K[60], L(60));
+ R(d, e, f, g, h, a, b, c, K[61], L(61));
+ R(c, d, e, f, g, h, a, b, K[62], L(62));
+ R(b, c, d, e, f, g, h, a, K[63], L(63));
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+ f += state[5];
+ g += state[6];
+ h += state[7];
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+ state[5] = f;
+ state[6] = g;
+ state[7] = h;
+
+ nblks -= 2;
+ }
+
+ while (nblks)
+ {
+ I(0); I(1); I(2); I(3);
+ I(4); I(5); I(6); I(7);
+ I(8); I(9); I(10); I(11);
+ I(12); I(13); I(14); I(15);
+ data += 64;
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], L(48));
+ R(h, a, b, c, d, e, f, g, K[49], L(49));
+ R(g, h, a, b, c, d, e, f, K[50], L(50));
+ R(f, g, h, a, b, c, d, e, K[51], L(51));
+ R(e, f, g, h, a, b, c, d, K[52], L(52));
+ R(d, e, f, g, h, a, b, c, K[53], L(53));
+ R(c, d, e, f, g, h, a, b, K[54], L(54));
+ R(b, c, d, e, f, g, h, a, K[55], L(55));
+ R(a, b, c, d, e, f, g, h, K[56], L(56));
+ R(h, a, b, c, d, e, f, g, K[57], L(57));
+ R(g, h, a, b, c, d, e, f, K[58], L(58));
+ R(f, g, h, a, b, c, d, e, K[59], L(59));
+ R(e, f, g, h, a, b, c, d, K[60], L(60));
+ R(d, e, f, g, h, a, b, c, K[61], L(61));
+ R(c, d, e, f, g, h, a, b, K[62], L(62));
+ R(b, c, d, e, f, g, h, a, K[63], L(63));
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+ f += state[5];
+ g += state[6];
+ h += state[7];
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+ state[5] = f;
+ state[6] = g;
+ state[7] = h;
+
+ nblks--;
+ }
+
+ return sizeof(w);
+}
+
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
diff --git a/comm/third_party/libgcrypt/cipher/sha256-ssse3-amd64.S b/comm/third_party/libgcrypt/cipher/sha256-ssse3-amd64.S
new file mode 100644
index 0000000000..098b0eb641
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256-ssse3-amd64.S
@@ -0,0 +1,528 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the
+; distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This code is described in an Intel White-Paper:
+; "Fast SHA-256 Implementations on Intel Architecture Processors"
+;
+; To find it, surf to http://www.intel.com/p/en_US/embedded
+; and search for that title.
+; The paper is expected to be released roughly at the end of April, 2012
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 1 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ * by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Note: original implementation was named as SHA256-SSE4. However, only SSSE3
+ * is required.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA256)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+#define MOVDQ movdqu /* assume buffers not aligned */
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros*/
+
+/* addm [mem], reg
+ * Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+ add p2, p1; \
+ mov p1, p2;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+/* COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
+ * Load xmm with mem and byte swap each dword */
+#define COPY_XMM_AND_BSWAP(p1, p2, p3) \
+ MOVDQ p1, p2; \
+ pshufb p1, p3;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+#define X0 xmm4
+#define X1 xmm5
+#define X2 xmm6
+#define X3 xmm7
+
+#define XTMP0 xmm0
+#define XTMP1 xmm1
+#define XTMP2 xmm2
+#define XTMP3 xmm3
+#define XTMP4 xmm8
+#define XFER xmm9
+
+#define SHUF_00BA xmm10 /* shuffle xBxA -> 00BA */
+#define SHUF_DC00 xmm11 /* shuffle xDxC -> DC00 */
+#define BYTE_FLIP_MASK xmm12
+
+#define NUM_BLKS rdx /* 3rd arg */
+#define CTX rsi /* 2nd arg */
+#define INP rdi /* 1st arg */
+
+#define SRND rdi /* clobbers INP */
+#define c ecx
+#define d r8d
+#define e edx
+
+#define TBL rbp
+#define a eax
+#define b ebx
+
+#define f r9d
+#define g r10d
+#define h r11d
+
+#define y0 r13d
+#define y1 r14d
+#define y2 r15d
+
+
+
+#define _INP_END_SIZE 8
+#define _INP_SIZE 8
+#define _XFER_SIZE 8
+#define _XMM_SAVE_SIZE 0
+/* STACK_SIZE plus pushes must be an odd multiple of 8 */
+#define _ALIGN_SIZE 8
+
+#define _INP_END 0
+#define _INP (_INP_END + _INP_END_SIZE)
+#define _XFER (_INP + _INP_SIZE)
+#define _XMM_SAVE (_XFER + _XFER_SIZE + _ALIGN_SIZE)
+#define STACK_SIZE (_XMM_SAVE + _XMM_SAVE_SIZE)
+
+
+#define FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ /* compute s0 four at a time and s1 two at a time */; \
+ /* compute W[-16] + W[-7] 4 at a time */; \
+ movdqa XTMP0, X3; \
+ mov y0, e /* y0 = e */; \
+ ror y0, (25-11) /* y0 = e >> (25-11) */; \
+ mov y1, a /* y1 = a */; \
+ palignr XTMP0, X2, 4 /* XTMP0 = W[-7] */; \
+ ror y1, (22-13) /* y1 = a >> (22-13) */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ mov y2, f /* y2 = f */; \
+ ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ movdqa XTMP1, X1; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ xor y2, g /* y2 = f^g */; \
+ paddd XTMP0, X0 /* XTMP0 = W[-7] + W[-16] */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ /* compute s0 */; \
+ palignr XTMP1, X0, 4 /* XTMP1 = W[-15] */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ movdqa XTMP2, XTMP1 /* XTMP2 = W[-15] */; \
+ ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ add y2, [rsp + _XFER + 0*4] /* y2 = k + w + S1 + CH */; \
+ movdqa XTMP3, XTMP1 /* XTMP3 = W[-15] */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ pslld XTMP1, (32-7); \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ psrld XTMP2, 7; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ por XTMP1, XTMP2 /* XTMP1 = W[-15] ror 7 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ movdqa XTMP2, XTMP3 /* XTMP2 = W[-15] */; \
+ mov y0, e /* y0 = e */; \
+ mov y1, a /* y1 = a */; \
+ movdqa XTMP4, XTMP3 /* XTMP4 = W[-15] */; \
+ ror y0, (25-11) /* y0 = e >> (25-11) */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ mov y2, f /* y2 = f */; \
+ ror y1, (22-13) /* y1 = a >> (22-13) */; \
+ pslld XTMP3, (32-18); \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ xor y2, g /* y2 = f^g */; \
+ psrld XTMP2, 18; \
+ ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ pxor XTMP1, XTMP3; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ psrld XTMP4, 3 /* XTMP4 = W[-15] >> 3 */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ add y2, [rsp + _XFER + 1*4] /* y2 = k + w + S1 + CH */; \
+ ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ pxor XTMP1, XTMP2 /* XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ pxor XTMP1, XTMP4 /* XTMP1 = s0 */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ /* compute low s1 */; \
+ pshufd XTMP2, X3, 0b11111010 /* XTMP2 = W[-2] {BBAA} */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ paddd XTMP0, XTMP1 /* XTMP0 = W[-16] + W[-7] + s0 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ movdqa XTMP3, XTMP2 /* XTMP3 = W[-2] {BBAA} */; \
+ mov y0, e /* y0 = e */; \
+ mov y1, a /* y1 = a */; \
+ ror y0, (25-11) /* y0 = e >> (25-11) */; \
+ movdqa XTMP4, XTMP2 /* XTMP4 = W[-2] {BBAA} */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ ror y1, (22-13) /* y1 = a >> (22-13) */; \
+ mov y2, f /* y2 = f */; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ psrlq XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xBxA} */; \
+ xor y2, g /* y2 = f^g */; \
+ psrlq XTMP3, 19 /* XTMP3 = W[-2] ror 19 {xBxA} */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ psrld XTMP4, 10 /* XTMP4 = W[-2] >> 10 {BBAA} */; \
+ ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ pxor XTMP2, XTMP3; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, [rsp + _XFER + 2*4] /* y2 = k + w + S1 + CH */; \
+ pxor XTMP4, XTMP2 /* XTMP4 = s1 {xBxA} */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ pshufb XTMP4, SHUF_00BA /* XTMP4 = s1 {00BA} */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ paddd XTMP0, XTMP4 /* XTMP0 = {..., ..., W[1], W[0]} */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ /* compute high s1 */; \
+ pshufd XTMP2, XTMP0, 0b01010000 /* XTMP2 = W[-2] {DDCC} */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ movdqa XTMP3, XTMP2 /* XTMP3 = W[-2] {DDCC} */; \
+ mov y0, e /* y0 = e */; \
+ ror y0, (25-11) /* y0 = e >> (25-11) */; \
+ mov y1, a /* y1 = a */; \
+ movdqa X0, XTMP2 /* X0 = W[-2] {DDCC} */; \
+ ror y1, (22-13) /* y1 = a >> (22-13) */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ mov y2, f /* y2 = f */; \
+ ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ psrlq XTMP2, 17 /* XTMP2 = W[-2] ror 17 {xDxC} */; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ xor y2, g /* y2 = f^g */; \
+ psrlq XTMP3, 19 /* XTMP3 = W[-2] ror 19 {xDxC} */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ psrld X0, 10 /* X0 = W[-2] >> 10 {DDCC} */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ pxor XTMP2, XTMP3; \
+ ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ add y2, [rsp + _XFER + 3*4] /* y2 = k + w + S1 + CH */; \
+ pxor X0, XTMP2 /* X0 = s1 {xDxC} */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ pshufb X0, SHUF_DC00 /* X0 = s1 {DC00} */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ paddd X0, XTMP0 /* X0 = {W[3], W[2], W[1], W[0]} */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+ FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h); \
+ FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, h, a, b, c, d, e, f, g); \
+ FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, g, h, a, b, c, d, e, f); \
+ FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, f, g, h, a, b, c, d, e);
+
+/* input is [rsp + _XFER + %1 * 4] */
+#define DO_ROUND(i1, a, b, c, d, e, f, g, h) \
+ mov y0, e /* y0 = e */; \
+ ror y0, (25-11) /* y0 = e >> (25-11) */; \
+ mov y1, a /* y1 = a */; \
+ xor y0, e /* y0 = e ^ (e >> (25-11)) */; \
+ ror y1, (22-13) /* y1 = a >> (22-13) */; \
+ mov y2, f /* y2 = f */; \
+ xor y1, a /* y1 = a ^ (a >> (22-13) */; \
+ ror y0, (11-6) /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+ xor y2, g /* y2 = f^g */; \
+ xor y0, e /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+ ror y1, (13-2) /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+ and y2, e /* y2 = (f^g)&e */; \
+ xor y1, a /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+ ror y0, 6 /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+ xor y2, g /* y2 = CH = ((f^g)&e)^g */; \
+ add y2, y0 /* y2 = S1 + CH */; \
+ ror y1, 2 /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+ add y2, [rsp + _XFER + i1 * 4] /* y2 = k + w + S1 + CH */; \
+ mov y0, a /* y0 = a */; \
+ add h, y2 /* h = h + S1 + CH + k + w */; \
+ mov y2, a /* y2 = a */; \
+ or y0, c /* y0 = a|c */; \
+ add d, h /* d = d + h + S1 + CH + k + w */; \
+ and y2, c /* y2 = a&c */; \
+ and y0, b /* y0 = (a|c)&b */; \
+ add h, y1 /* h = h + S1 + CH + k + w + S0 */; \
+ or y0, y2 /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+ lea h, [h + y0] /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; void sha256_sse4(void *input_data, UINT32 digest[8], UINT64 num_blks)
+;; arg 1 : pointer to input data
+;; arg 2 : pointer to digest
+;; arg 3 : Num blocks
+*/
+.text
+.globl _gcry_sha256_transform_amd64_ssse3
+ELF(.type _gcry_sha256_transform_amd64_ssse3,@function;)
+.align 16
+_gcry_sha256_transform_amd64_ssse3:
+ CFI_STARTPROC()
+ push rbx
+ CFI_PUSH(rbx)
+ push rbp
+ CFI_PUSH(rbp)
+ push r13
+ CFI_PUSH(r13)
+ push r14
+ CFI_PUSH(r14)
+ push r15
+ CFI_PUSH(r15)
+
+ sub rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(STACK_SIZE);
+
+ shl NUM_BLKS, 6 /* convert to bytes */
+ jz .Ldone_hash
+ add NUM_BLKS, INP /* pointer to end of data */
+ mov [rsp + _INP_END], NUM_BLKS
+
+ /* load initial digest */
+ mov a,[4*0 + CTX]
+ mov b,[4*1 + CTX]
+ mov c,[4*2 + CTX]
+ mov d,[4*3 + CTX]
+ mov e,[4*4 + CTX]
+ mov f,[4*5 + CTX]
+ mov g,[4*6 + CTX]
+ mov h,[4*7 + CTX]
+
+ movdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+ movdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+ movdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+.Loop0:
+ lea TBL, [.LK256 ADD_RIP]
+
+ /* byte swap first 16 dwords */
+ COPY_XMM_AND_BSWAP(X0, [INP + 0*16], BYTE_FLIP_MASK)
+ COPY_XMM_AND_BSWAP(X1, [INP + 1*16], BYTE_FLIP_MASK)
+ COPY_XMM_AND_BSWAP(X2, [INP + 2*16], BYTE_FLIP_MASK)
+ COPY_XMM_AND_BSWAP(X3, [INP + 3*16], BYTE_FLIP_MASK)
+
+ mov [rsp + _INP], INP
+
+ /* schedule 48 input dwords, by doing 3 rounds of 16 each */
+ mov SRND, 3
+.align 16
+.Loop1:
+ movdqa XFER, [TBL + 0*16]
+ paddd XFER, X0
+ movdqa [rsp + _XFER], XFER
+ FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h)
+
+ movdqa XFER, [TBL + 1*16]
+ paddd XFER, X1
+ movdqa [rsp + _XFER], XFER
+ FOUR_ROUNDS_AND_SCHED(X1, X2, X3, X0, e, f, g, h, a, b, c, d)
+
+ movdqa XFER, [TBL + 2*16]
+ paddd XFER, X2
+ movdqa [rsp + _XFER], XFER
+ FOUR_ROUNDS_AND_SCHED(X2, X3, X0, X1, a, b, c, d, e, f, g, h)
+
+ movdqa XFER, [TBL + 3*16]
+ paddd XFER, X3
+ movdqa [rsp + _XFER], XFER
+ add TBL, 4*16
+ FOUR_ROUNDS_AND_SCHED(X3, X0, X1, X2, e, f, g, h, a, b, c, d)
+
+ sub SRND, 1
+ jne .Loop1
+
+ mov SRND, 2
+.Loop2:
+ paddd X0, [TBL + 0*16]
+ movdqa [rsp + _XFER], X0
+ DO_ROUND(0, a, b, c, d, e, f, g, h)
+ DO_ROUND(1, h, a, b, c, d, e, f, g)
+ DO_ROUND(2, g, h, a, b, c, d, e, f)
+ DO_ROUND(3, f, g, h, a, b, c, d, e)
+ paddd X1, [TBL + 1*16]
+ movdqa [rsp + _XFER], X1
+ add TBL, 2*16
+ DO_ROUND(0, e, f, g, h, a, b, c, d)
+ DO_ROUND(1, d, e, f, g, h, a, b, c)
+ DO_ROUND(2, c, d, e, f, g, h, a, b)
+ DO_ROUND(3, b, c, d, e, f, g, h, a)
+
+ movdqa X0, X2
+ movdqa X1, X3
+
+ sub SRND, 1
+ jne .Loop2
+
+ addm([4*0 + CTX],a)
+ addm([4*1 + CTX],b)
+ addm([4*2 + CTX],c)
+ addm([4*3 + CTX],d)
+ addm([4*4 + CTX],e)
+ addm([4*5 + CTX],f)
+ addm([4*6 + CTX],g)
+ addm([4*7 + CTX],h)
+
+ mov INP, [rsp + _INP]
+ add INP, 64
+ cmp INP, [rsp + _INP_END]
+ jne .Loop0
+
+ pxor xmm0, xmm0
+ pxor xmm1, xmm1
+ pxor xmm2, xmm2
+ pxor xmm3, xmm3
+ pxor xmm4, xmm4
+ pxor xmm5, xmm5
+ pxor xmm6, xmm6
+ pxor xmm7, xmm7
+ pxor xmm8, xmm8
+ pxor xmm9, xmm9
+ pxor xmm10, xmm10
+ pxor xmm11, xmm11
+ pxor xmm12, xmm12
+
+.Ldone_hash:
+ pxor XFER, XFER
+ movdqa [rsp + _XFER], XFER
+ xor eax, eax
+
+ add rsp, STACK_SIZE
+ CFI_ADJUST_CFA_OFFSET(-STACK_SIZE);
+
+ pop r15
+ CFI_POP(r15)
+ pop r14
+ CFI_POP(r14)
+ pop r13
+ CFI_POP(r13)
+ pop rbp
+ CFI_POP(rbp)
+ pop rbx
+ CFI_POP(rbx)
+
+ ret
+ CFI_ENDPROC()
+
+
+.align 16
+.LK256:
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203
+
+/* shuffle xBxA -> 00BA */
+.L_SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
+
+/* shuffle xDxC -> DC00 */
+.L_SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha256.c b/comm/third_party/libgcrypt/cipher/sha256.c
new file mode 100644
index 0000000000..9350589110
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha256.c
@@ -0,0 +1,857 @@
+/* sha256.c - SHA256 hash function
+ * Copyright (C) 2003, 2006, 2008, 2009 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/* Test vectors:
+
+ "abc"
+ SHA224: 23097d22 3405d822 8642a477 bda255b3 2aadbce4 bda0b3f7 e36c9da7
+ SHA256: ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad
+
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+ SHA224: 75388b16 512776cc 5dba5da1 fd890150 b0c6455c b4f58b19 52522525
+ SHA256: 248d6a61 d20638b8 e5c02693 0c3e6039 a33ce459 64ff2167 f6ecedd4 19db06c1
+
+ "a" one million times
+ SHA224: 20794655 980c91d8 bbb4c1ea 97618a4b f03f4258 1948b2ee 4ee7ad67
+ SHA256: cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0
+
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+ defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
+#undef USE_SHAEXT
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+ defined(HAVE_GCC_INLINE_ASM_SSE41) && \
+ defined(ENABLE_SHAEXT_SUPPORT)
+# define USE_SHAEXT 1
+#endif
+
+/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
+ * code. */
+#undef USE_ARM_CE
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+# define USE_ARM_CE 1
+# elif defined(__AARCH64EL__) \
+ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+# define USE_ARM_CE 1
+# endif
+#endif
+
+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code. */
+#undef USE_PPC_CRYPTO
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+# if __GNUC__ >= 4
+# define USE_PPC_CRYPTO 1
+# endif
+# endif
+#endif
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+
+typedef struct {
+ gcry_md_block_ctx_t bctx;
+ u32 h0,h1,h2,h3,h4,h5,h6,h7;
+#ifdef USE_S390X_CRYPTO
+ u32 final_len_msb, final_len_lsb; /* needs to be right after h7. */
+ int use_s390x_crypto;
+#endif
+} SHA256_CONTEXT;
+
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \
+ defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_SSSE3
+unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX2
+unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx2(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_SHAEXT
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha256_transform_intel_shaext(u32 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_intel_shaext(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_ARM_CE
+unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
+ const void *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_armv8_ce(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_PPC_CRYPTO
+unsigned int _gcry_sha256_transform_ppc8(u32 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+unsigned int _gcry_sha256_transform_ppc9(u32 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_ppc8(void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_ppc8 (&hd->h0, data, nblks);
+}
+
+static unsigned int
+do_sha256_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_ppc9 (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+do_sha256_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+
+ kimd_execute (KMID_FUNCTION_SHA256, &hd->h0, data, nblks * 64);
+ return 0;
+}
+
+static unsigned int
+do_sha256_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
+ u32 len_msb, u32 len_lsb)
+{
+ SHA256_CONTEXT *hd = ctx;
+
+ /* Make sure that 'final_len' is positioned at correct offset relative
+ * to 'h0'. This is because we are passing 'h0' pointer as start of
+ * parameter block to 'klmd' instruction. */
+
+ gcry_assert (offsetof (SHA256_CONTEXT, final_len_msb)
+ - offsetof (SHA256_CONTEXT, h0) == 8 * sizeof(u32));
+ gcry_assert (offsetof (SHA256_CONTEXT, final_len_lsb)
+ - offsetof (SHA256_CONTEXT, final_len_msb) == 1 * sizeof(u32));
+
+ hd->final_len_msb = len_msb;
+ hd->final_len_lsb = len_lsb;
+
+ klmd_execute (KMID_FUNCTION_SHA256, &hd->h0, data, datalen);
+ return 0;
+}
+#endif
+
+
+static unsigned int
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks);
+
+
+static void
+sha256_common_init (SHA256_CONTEXT *hd)
+{
+ unsigned int features = _gcry_get_hw_features ();
+
+ hd->bctx.nblocks = 0;
+ hd->bctx.nblocks_high = 0;
+ hd->bctx.count = 0;
+ hd->bctx.blocksize_shift = _gcry_ctz(64);
+
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
+#ifdef USE_SSSE3
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
+#endif
+#ifdef USE_AVX
+ /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
+ * Therefore use this implementation on Intel CPUs only. */
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx;
+#endif
+#ifdef USE_AVX2
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
+#endif
+#ifdef USE_SHAEXT
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha256_transform_intel_shaext;
+#endif
+#ifdef USE_ARM_CE
+ if ((features & HWF_ARM_SHA2) != 0)
+ hd->bctx.bwrite = do_sha256_transform_armv8_ce;
+#endif
+#ifdef USE_PPC_CRYPTO
+ if ((features & HWF_PPC_VCRYPTO) != 0)
+ hd->bctx.bwrite = do_sha256_transform_ppc8;
+ if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0)
+ hd->bctx.bwrite = do_sha256_transform_ppc9;
+#endif
+#ifdef USE_S390X_CRYPTO
+ hd->use_s390x_crypto = 0;
+ if ((features & HWF_S390X_MSA) != 0)
+ {
+ if ((kimd_query () & km_function_to_mask (KMID_FUNCTION_SHA256)) &&
+ (klmd_query () & km_function_to_mask (KMID_FUNCTION_SHA256)))
+ {
+ hd->bctx.bwrite = do_sha256_transform_s390x;
+ hd->use_s390x_crypto = 1;
+ }
+ }
+#endif
+ (void)features;
+}
+
+
+static void
+sha256_init (void *context, unsigned int flags)
+{
+ SHA256_CONTEXT *hd = context;
+
+ (void)flags;
+
+ hd->h0 = 0x6a09e667;
+ hd->h1 = 0xbb67ae85;
+ hd->h2 = 0x3c6ef372;
+ hd->h3 = 0xa54ff53a;
+ hd->h4 = 0x510e527f;
+ hd->h5 = 0x9b05688c;
+ hd->h6 = 0x1f83d9ab;
+ hd->h7 = 0x5be0cd19;
+
+ sha256_common_init (hd);
+}
+
+
+static void
+sha224_init (void *context, unsigned int flags)
+{
+ SHA256_CONTEXT *hd = context;
+
+ (void)flags;
+
+ hd->h0 = 0xc1059ed8;
+ hd->h1 = 0x367cd507;
+ hd->h2 = 0x3070dd17;
+ hd->h3 = 0xf70e5939;
+ hd->h4 = 0xffc00b31;
+ hd->h5 = 0x68581511;
+ hd->h6 = 0x64f98fa7;
+ hd->h7 = 0xbefa4fa4;
+
+ sha256_common_init (hd);
+}
+
+
+/*
+ Transform the message X which consists of 16 32-bit-words. See FIPS
+ 180-2 for details. */
+#define R(a,b,c,d,e,f,g,h,k,w) do \
+ { \
+ t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w); \
+ t2 = Sum0((a)) + Maj((a),(b),(c)); \
+ d += t1; \
+ h = t1 + t2; \
+ } while (0)
+
+/* (4.2) same as SHA-1's F1. */
+#define Cho(x, y, z) (z ^ (x & (y ^ z)))
+
+/* (4.3) same as SHA-1's F3 */
+#define Maj(x, y, z) ((x & y) + (z & (x ^ y)))
+
+/* (4.4) */
+#define Sum0(x) (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22))
+
+/* (4.5) */
+#define Sum1(x) (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25))
+
+/* Message expansion */
+#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3)) /* (4.6) */
+#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10)) /* (4.7) */
+#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
+#define W(i) ( w[i&0x0f] = S1(w[(i-2) &0x0f]) \
+ + w[(i-7) &0x0f] \
+ + S0(w[(i-15)&0x0f]) \
+ + w[(i-16)&0x0f] )
+
+static unsigned int
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ static const u32 K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+ };
+
+ do
+ {
+
+ u32 a,b,c,d,e,f,g,h,t1,t2;
+ u32 w[16];
+
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+ f = hd->h5;
+ g = hd->h6;
+ h = hd->h7;
+
+ R(a, b, c, d, e, f, g, h, K[0], I(0));
+ R(h, a, b, c, d, e, f, g, K[1], I(1));
+ R(g, h, a, b, c, d, e, f, K[2], I(2));
+ R(f, g, h, a, b, c, d, e, K[3], I(3));
+ R(e, f, g, h, a, b, c, d, K[4], I(4));
+ R(d, e, f, g, h, a, b, c, K[5], I(5));
+ R(c, d, e, f, g, h, a, b, K[6], I(6));
+ R(b, c, d, e, f, g, h, a, K[7], I(7));
+ R(a, b, c, d, e, f, g, h, K[8], I(8));
+ R(h, a, b, c, d, e, f, g, K[9], I(9));
+ R(g, h, a, b, c, d, e, f, K[10], I(10));
+ R(f, g, h, a, b, c, d, e, K[11], I(11));
+ R(e, f, g, h, a, b, c, d, K[12], I(12));
+ R(d, e, f, g, h, a, b, c, K[13], I(13));
+ R(c, d, e, f, g, h, a, b, K[14], I(14));
+ R(b, c, d, e, f, g, h, a, K[15], I(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+ hd->h5 += f;
+ hd->h6 += g;
+ hd->h7 += h;
+
+ data += 64;
+ }
+ while (--nblks);
+
+ return 26*4 + 32 + 3 * sizeof(void*);
+}
+
+#undef S0
+#undef S1
+#undef R
+
+
+/*
+ The routine finally terminates the computation and returns the
+ digest. The handle is prepared for a new cycle, but adding bytes
+ to the handle will the destroy the returned buffer. Returns: 32
+ bytes with the message the digest. */
+static void
+sha256_final(void *context)
+{
+ SHA256_CONTEXT *hd = context;
+ u32 t, th, msb, lsb;
+ byte *p;
+ unsigned int burn;
+
+ t = hd->bctx.nblocks;
+ if (sizeof t == sizeof hd->bctx.nblocks)
+ th = hd->bctx.nblocks_high;
+ else
+ th = hd->bctx.nblocks >> 32;
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 26);
+ /* add the count */
+ t = lsb;
+ if ((lsb += hd->bctx.count) < t)
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if (0)
+ { }
+#ifdef USE_S390X_CRYPTO
+ else if (hd->use_s390x_crypto)
+ {
+ burn = do_sha256_final_s390x (hd, hd->bctx.buf, hd->bctx.count, msb, lsb);
+ }
+#endif
+ else if (hd->bctx.count < 56) /* enough room */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+ if (hd->bctx.count < 56)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+ /* append the 64 bit count */
+ buf_put_be32(hd->bctx.buf + 56, msb);
+ buf_put_be32(hd->bctx.buf + 60, lsb);
+ burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
+ }
+ else /* need one extra block */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+ /* fill pad and next block with zeroes */
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+ /* append the 64 bit count */
+ buf_put_be32(hd->bctx.buf + 64 + 56, msb);
+ buf_put_be32(hd->bctx.buf + 64 + 60, lsb);
+ burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 2);
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
+ X(0);
+ X(1);
+ X(2);
+ X(3);
+ X(4);
+ X(5);
+ X(6);
+ X(7);
+#undef X
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static byte *
+sha256_read (void *context)
+{
+ SHA256_CONTEXT *hd = context;
+
+ return hd->bctx.buf;
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 32 bytes. */
+void
+_gcry_sha256_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SHA256_CONTEXT hd;
+
+ sha256_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sha256_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+void
+_gcry_sha256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
+{
+ SHA256_CONTEXT hd;
+
+ sha256_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sha256_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 28 bytes. */
+static void
+_gcry_sha224_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SHA256_CONTEXT hd;
+
+ sha224_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sha256_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 28);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+static void
+_gcry_sha224_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
+{
+ SHA256_CONTEXT hd;
+
+ sha224_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sha256_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 28);
+}
+
+
+
+/*
+ Self-test section.
+ */
+
+
+static gpg_err_code_t
+selftests_sha224 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA224, 0,
+ "abc", 3,
+ "\x23\x09\x7d\x22\x34\x05\xd8\x22\x86\x42\xa4\x77\xbd\xa2\x55\xb3"
+ "\x2a\xad\xbc\xe4\xbd\xa0\xb3\xf7\xe3\x6c\x9d\xa7", 28);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA224, 0,
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
+ "\x75\x38\x8b\x16\x51\x27\x76\xcc\x5d\xba\x5d\xa1\xfd\x89\x01\x50"
+ "\xb0\xc6\x45\x5c\xb4\xf5\x8b\x19\x52\x52\x25\x25", 28);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA224, 1,
+ NULL, 0,
+ "\x20\x79\x46\x55\x98\x0c\x91\xd8\xbb\xb4\xc1\xea\x97\x61\x8a\x4b"
+ "\xf0\x3f\x42\x58\x19\x48\xb2\xee\x4e\xe7\xad\x67", 28);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SHA224, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+selftests_sha256 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA256, 0,
+ "abc", 3,
+ "\xba\x78\x16\xbf\x8f\x01\xcf\xea\x41\x41\x40\xde\x5d\xae\x22\x23"
+ "\xb0\x03\x61\xa3\x96\x17\x7a\x9c\xb4\x10\xff\x61\xf2\x00\x15\xad", 32);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA256, 0,
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
+ "\x24\x8d\x6a\x61\xd2\x06\x38\xb8\xe5\xc0\x26\x93\x0c\x3e\x60\x39"
+ "\xa3\x3c\xe4\x59\x64\xff\x21\x67\xf6\xec\xed\xd4\x19\xdb\x06\xc1",
+ 32);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA256, 1,
+ NULL, 0,
+ "\xcd\xc7\x6e\x5c\x99\x14\xfb\x92\x81\xa1\xc7\xe2\x84\xd7\x3e\x67"
+ "\xf1\x80\x9a\x48\xa4\x97\x20\x0e\x04\x6d\x39\xcc\xc7\x11\x2c\xd0",
+ 32);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SHA256, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_MD_SHA224:
+ ec = selftests_sha224 (extended, report);
+ break;
+ case GCRY_MD_SHA256:
+ ec = selftests_sha256 (extended, report);
+ break;
+ default:
+ ec = GPG_ERR_DIGEST_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+
+
+
+static byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */
+ { 0x30, 0x2D, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48,
+ 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04,
+ 0x1C
+ };
+
+static gcry_md_oid_spec_t oid_spec_sha224[] =
+ {
+ /* From RFC3874, Section 4 */
+ { "2.16.840.1.101.3.4.2.4" },
+ { NULL },
+ };
+
+static byte asn256[19] = /* Object ID is 2.16.840.1.101.3.4.2.1 */
+ { 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
+ 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05,
+ 0x00, 0x04, 0x20 };
+
+static gcry_md_oid_spec_t oid_spec_sha256[] =
+ {
+ /* According to the OpenPGP draft rfc2440-bis06 */
+ { "2.16.840.1.101.3.4.2.1" },
+ /* PKCS#1 sha256WithRSAEncryption */
+ { "1.2.840.113549.1.1.11" },
+
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha224 =
+ {
+ GCRY_MD_SHA224, {0, 1},
+ "SHA224", asn224, DIM (asn224), oid_spec_sha224, 28,
+ sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
+ _gcry_sha224_hash_buffer, _gcry_sha224_hash_buffers,
+ sizeof (SHA256_CONTEXT),
+ run_selftests
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha256 =
+ {
+ GCRY_MD_SHA256, {0, 1},
+ "SHA256", asn256, DIM (asn256), oid_spec_sha256, 32,
+ sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
+ _gcry_sha256_hash_buffer, _gcry_sha256_hash_buffers,
+ sizeof (SHA256_CONTEXT),
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/sha512-arm.S b/comm/third_party/libgcrypt/cipher/sha512-arm.S
new file mode 100644
index 0000000000..94ec0141e7
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512-arm.S
@@ -0,0 +1,464 @@
+/* sha512-arm.S - ARM assembly implementation of SHA-512 transform
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+#define hd_h ((hd_g) + 8)
+
+/* register macros */
+#define RK %r2
+
+#define RElo %r0
+#define REhi %r1
+
+#define RT1lo %r3
+#define RT1hi %r4
+#define RT2lo %r5
+#define RT2hi %r6
+#define RWlo %r7
+#define RWhi %r8
+#define RT3lo %r9
+#define RT3hi %r10
+#define RT4lo %r11
+#define RT4hi %ip
+
+#define RRND %lr
+
+/* variable offsets in stack */
+#define ctx (0)
+#define data ((ctx) + 4)
+#define nblks ((data) + 4)
+#define _a ((nblks) + 4)
+#define _b ((_a) + 8)
+#define _c ((_b) + 8)
+#define _d ((_c) + 8)
+#define _e ((_d) + 8)
+#define _f ((_e) + 8)
+#define _g ((_f) + 8)
+#define _h ((_g) + 8)
+
+#define w(i) ((_h) + 8 + ((i) % 16) * 8)
+
+#define STACK_MAX (w(15) + 8)
+
+/* helper macros */
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 3)]; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 0)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#ifdef __ARMEL__
+ /* bswap on little-endian */
+#ifdef HAVE_ARM_ARCH_V6
+ #define be_to_host(reg, rtmp) \
+ rev reg, reg;
+#else
+ #define be_to_host(reg, rtmp) \
+ eor rtmp, reg, reg, ror #16; \
+ mov rtmp, rtmp, lsr #8; \
+ bic rtmp, rtmp, #65280; \
+ eor reg, rtmp, reg, ror #8;
+#endif
+#else
+ /* nop on big-endian */
+ #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define host_to_host(x, y) /*_*/
+
+#define read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, convert, rtmp) \
+ ldr lo0, [rin, #((offs) + 0 * 8 + 4)]; \
+ ldr hi0, [rin, #((offs) + 0 * 8 + 0)]; \
+ ldr lo1, [rin, #((offs) + 1 * 8 + 4)]; \
+ ldr hi1, [rin, #((offs) + 1 * 8 + 0)]; \
+ ldr lo2, [rin, #((offs) + 2 * 8 + 4)]; \
+ convert(lo0, rtmp); \
+ ldr hi2, [rin, #((offs) + 2 * 8 + 0)]; \
+ convert(hi0, rtmp); \
+ ldr lo3, [rin, #((offs) + 3 * 8 + 4)]; \
+ convert(lo1, rtmp); \
+ ldr hi3, [rin, #((offs) + 3 * 8 + 0)]; \
+ convert(hi1, rtmp); \
+ convert(lo2, rtmp); \
+ convert(hi2, rtmp); \
+ convert(lo3, rtmp); \
+ convert(hi3, rtmp);
+
+#define read_be64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, rtmp0) \
+ read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, be_to_host, rtmp0)
+
+/* need to handle unaligned reads by byte reads */
+#define read_be64_unaligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, rtmp0) \
+ ldr_unaligned_be(lo0, rin, (offs) + 0 * 8 + 4, rtmp0); \
+ ldr_unaligned_be(hi0, rin, (offs) + 0 * 8 + 0, rtmp0); \
+ ldr_unaligned_be(lo1, rin, (offs) + 1 * 8 + 4, rtmp0); \
+ ldr_unaligned_be(hi1, rin, (offs) + 1 * 8 + 0, rtmp0); \
+ ldr_unaligned_be(lo2, rin, (offs) + 2 * 8 + 4, rtmp0); \
+ ldr_unaligned_be(hi2, rin, (offs) + 2 * 8 + 0, rtmp0); \
+ ldr_unaligned_be(lo3, rin, (offs) + 3 * 8 + 4, rtmp0); \
+ ldr_unaligned_be(hi3, rin, (offs) + 3 * 8 + 0, rtmp0);
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+
+/* Round function */
+
+#define R(_a,_b,_c,_d,_e,_f,_g,_h,W,wi) \
+ /* Message expansion, t1 = _h + w[i] */ \
+ W(_a,_h,wi); \
+ \
+ /* w = Sum1(_e) */ \
+ mov RWlo, RElo, lsr#14; \
+ ldm RK!, {RT2lo-RT2hi}; \
+ mov RWhi, REhi, lsr#14; \
+ eor RWlo, RWlo, RElo, lsr#18; \
+ eor RWhi, RWhi, REhi, lsr#18; \
+ ldr RT3lo, [%sp, #(_f)]; \
+ adds RT1lo, RT2lo; /* t1 += K */ \
+ ldr RT3hi, [%sp, #(_f) + 4]; \
+ adc RT1hi, RT2hi; \
+ ldr RT4lo, [%sp, #(_g)]; \
+ eor RWlo, RWlo, RElo, lsl#23; \
+ ldr RT4hi, [%sp, #(_g) + 4]; \
+ eor RWhi, RWhi, REhi, lsl#23; \
+ eor RWlo, RWlo, REhi, lsl#18; \
+ eor RWhi, RWhi, RElo, lsl#18; \
+ eor RWlo, RWlo, REhi, lsl#14; \
+ eor RWhi, RWhi, RElo, lsl#14; \
+ eor RWlo, RWlo, REhi, lsr#9; \
+ eor RWhi, RWhi, RElo, lsr#9; \
+ \
+ /* Cho(_e,_f,_g) => (_e & _f) ^ (~_e & _g) */ \
+ adds RT1lo, RWlo; /* t1 += Sum1(_e) */ \
+ and RT3lo, RT3lo, RElo; \
+ adc RT1hi, RWhi; \
+ and RT3hi, RT3hi, REhi; \
+ bic RT4lo, RT4lo, RElo; \
+ bic RT4hi, RT4hi, REhi; \
+ eor RT3lo, RT3lo, RT4lo; \
+ eor RT3hi, RT3hi, RT4hi; \
+ \
+ /* Load D */ \
+ /* t1 += Cho(_e,_f,_g) */ \
+ ldr RElo, [%sp, #(_d)]; \
+ adds RT1lo, RT3lo; \
+ ldr REhi, [%sp, #(_d) + 4]; \
+ adc RT1hi, RT3hi; \
+ \
+ /* Load A */ \
+ ldr RT3lo, [%sp, #(_a)]; \
+ \
+ /* _d += t1 */ \
+ adds RElo, RT1lo; \
+ ldr RT3hi, [%sp, #(_a) + 4]; \
+ adc REhi, RT1hi; \
+ \
+ /* Store D */ \
+ str RElo, [%sp, #(_d)]; \
+ \
+ /* t2 = Sum0(_a) */ \
+ mov RT2lo, RT3lo, lsr#28; \
+ str REhi, [%sp, #(_d) + 4]; \
+ mov RT2hi, RT3hi, lsr#28; \
+ ldr RWlo, [%sp, #(_b)]; \
+ eor RT2lo, RT2lo, RT3lo, lsl#30; \
+ ldr RWhi, [%sp, #(_b) + 4]; \
+ eor RT2hi, RT2hi, RT3hi, lsl#30; \
+ eor RT2lo, RT2lo, RT3lo, lsl#25; \
+ eor RT2hi, RT2hi, RT3hi, lsl#25; \
+ eor RT2lo, RT2lo, RT3hi, lsl#4; \
+ eor RT2hi, RT2hi, RT3lo, lsl#4; \
+ eor RT2lo, RT2lo, RT3hi, lsr#2; \
+ eor RT2hi, RT2hi, RT3lo, lsr#2; \
+ eor RT2lo, RT2lo, RT3hi, lsr#7; \
+ eor RT2hi, RT2hi, RT3lo, lsr#7; \
+ \
+ /* t2 += t1 */ \
+ adds RT2lo, RT1lo; \
+ ldr RT1lo, [%sp, #(_c)]; \
+ adc RT2hi, RT1hi; \
+ \
+ /* Maj(_a,_b,_c) => ((_a & _b) ^ (_c & (_a ^ _b))) */ \
+ ldr RT1hi, [%sp, #(_c) + 4]; \
+ and RT4lo, RWlo, RT3lo; \
+ and RT4hi, RWhi, RT3hi; \
+ eor RWlo, RWlo, RT3lo; \
+ eor RWhi, RWhi, RT3hi; \
+ and RWlo, RWlo, RT1lo; \
+ and RWhi, RWhi, RT1hi; \
+ eor RWlo, RWlo, RT4lo; \
+ eor RWhi, RWhi, RT4hi; \
+
+/* Message expansion */
+
+#define W_0_63(_a,_h,i) \
+ ldr RT3lo, [%sp, #(w(i-2))]; \
+ adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
+ ldr RT3hi, [%sp, #(w(i-2)) + 4]; \
+ adc RT2hi, RWhi; \
+ /* nw = S1(w[i-2]) */ \
+ ldr RT1lo, [%sp, #(_h)]; /* Load H */ \
+ mov RWlo, RT3lo, lsr#19; \
+ str RT2lo, [%sp, #(_a)]; \
+ eor RWlo, RWlo, RT3lo, lsl#3; \
+ ldr RT1hi, [%sp, #(_h) + 4]; \
+ mov RWhi, RT3hi, lsr#19; \
+ ldr RT2lo, [%sp, #(w(i-7))]; \
+ eor RWhi, RWhi, RT3hi, lsl#3; \
+ str RT2hi, [%sp, #(_a) + 4]; \
+ eor RWlo, RWlo, RT3lo, lsr#6; \
+ ldr RT2hi, [%sp, #(w(i-7)) + 4]; \
+ eor RWhi, RWhi, RT3hi, lsr#6; \
+ eor RWlo, RWlo, RT3hi, lsl#13; \
+ eor RWhi, RWhi, RT3lo, lsl#13; \
+ eor RWlo, RWlo, RT3hi, lsr#29; \
+ eor RWhi, RWhi, RT3lo, lsr#29; \
+ ldr RT3lo, [%sp, #(w(i-15))]; \
+ eor RWlo, RWlo, RT3hi, lsl#26; \
+ ldr RT3hi, [%sp, #(w(i-15)) + 4]; \
+ \
+ adds RT2lo, RWlo; /* nw += w[i-7] */ \
+ ldr RWlo, [%sp, #(w(i-16))]; \
+ adc RT2hi, RWhi; \
+ mov RT4lo, RT3lo, lsr#1; /* S0(w[i-15]) */ \
+ ldr RWhi, [%sp, #(w(i-16)) + 4]; \
+ mov RT4hi, RT3hi, lsr#1; \
+ adds RT2lo, RWlo; /* nw += w[i-16] */ \
+ eor RT4lo, RT4lo, RT3lo, lsr#8; \
+ eor RT4hi, RT4hi, RT3hi, lsr#8; \
+ eor RT4lo, RT4lo, RT3lo, lsr#7; \
+ eor RT4hi, RT4hi, RT3hi, lsr#7; \
+ eor RT4lo, RT4lo, RT3hi, lsl#31; \
+ eor RT4hi, RT4hi, RT3lo, lsl#31; \
+ eor RT4lo, RT4lo, RT3hi, lsl#24; \
+ eor RT4hi, RT4hi, RT3lo, lsl#24; \
+ eor RT4lo, RT4lo, RT3hi, lsl#25; \
+ adc RT2hi, RWhi; \
+ \
+ /* nw += S0(w[i-15]) */ \
+ adds RT2lo, RT4lo; \
+ adc RT2hi, RT4hi; \
+ \
+ /* w[0] = nw */ \
+ str RT2lo, [%sp, #(w(i))]; \
+ adds RT1lo, RWlo; \
+ str RT2hi, [%sp, #(w(i)) + 4]; \
+ adc RT1hi, RWhi;
+
+#define W_64_79(_a,_h,i) \
+ adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
+ ldr RWlo, [%sp, #(w(i-16))]; \
+ adc RT2hi, RWhi; \
+ ldr RWhi, [%sp, #(w(i-16)) + 4]; \
+ ldr RT1lo, [%sp, #(_h)]; /* Load H */ \
+ ldr RT1hi, [%sp, #(_h) + 4]; \
+ str RT2lo, [%sp, #(_a)]; \
+ str RT2hi, [%sp, #(_a) + 4]; \
+ adds RT1lo, RWlo; \
+ adc RT1hi, RWhi;
+
+.align 3
+.globl _gcry_sha512_transform_arm
+.type _gcry_sha512_transform_arm,%function;
+
+_gcry_sha512_transform_arm:
+ /* Input:
+ * %r0: SHA512_CONTEXT
+ * %r1: data
+ * %r2: u64 k[] constants
+ * %r3: nblks
+ */
+ push {%r4-%r11, %ip, %lr};
+ sub %sp, %sp, #STACK_MAX;
+ movs RWlo, %r3;
+ str %r0, [%sp, #(ctx)];
+
+ beq .Ldone;
+
+.Loop_blocks:
+ str RWlo, [%sp, #nblks];
+
+ /* Load context to stack */
+ add RWhi, %sp, #(_a);
+ ldm %r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+ stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+ ldm %r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+ stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ /* Load input to w[16] */
+
+ /* test if data is unaligned */
+ tst %r1, #3;
+ beq 1f;
+
+ /* unaligned load */
+ add RWhi, %sp, #(w(0));
+ read_be64_unaligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ read_be64_unaligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ read_be64_unaligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ read_be64_unaligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ b 2f;
+1:
+ /* aligned load */
+ add RWhi, %sp, #(w(0));
+ read_be64_aligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ read_be64_aligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ read_be64_aligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ read_be64_aligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+2:
+ add %r1, #(16 * 8);
+ stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+ str %r1, [%sp, #(data)];
+
+ /* preload E & A */
+ ldr RElo, [%sp, #(_e)];
+ ldr REhi, [%sp, #(_e) + 4];
+ mov RWlo, #0;
+ ldr RT2lo, [%sp, #(_a)];
+ mov RRND, #(80-16);
+ ldr RT2hi, [%sp, #(_a) + 4];
+ mov RWhi, #0;
+
+.Loop_rounds:
+ R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 16);
+ R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 17);
+ R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 18);
+ R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 19);
+ R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 20);
+ R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 21);
+ R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 22);
+ R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 23);
+ R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 24);
+ R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 25);
+ R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 26);
+ R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 27);
+ R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 28);
+ R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 29);
+ R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 30);
+ R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 31);
+
+ subs RRND, #16;
+ bne .Loop_rounds;
+
+ R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 16);
+ R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 17);
+ R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 18);
+ R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 19);
+ R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 20);
+ R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 21);
+ R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 22);
+ R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 23);
+ R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 24);
+ R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 25);
+ R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 26);
+ R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 27);
+ R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 28);
+ R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 29);
+ R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 30);
+ R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 31);
+
+ ldr %r0, [%sp, #(ctx)];
+ adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */
+ ldr %r1, [%sp, #(data)];
+ adc RT2hi, RWhi;
+
+ ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+ adds RT1lo, RT2lo;
+ ldr RT2lo, [%sp, #(_b + 0)];
+ adc RT1hi, RT2hi;
+ ldr RT2hi, [%sp, #(_b + 4)];
+ adds RWlo, RT2lo;
+ ldr RT2lo, [%sp, #(_c + 0)];
+ adc RWhi, RT2hi;
+ ldr RT2hi, [%sp, #(_c + 4)];
+ adds RT3lo, RT2lo;
+ ldr RT2lo, [%sp, #(_d + 0)];
+ adc RT3hi, RT2hi;
+ ldr RT2hi, [%sp, #(_d + 4)];
+ adds RT4lo, RT2lo;
+ ldr RT2lo, [%sp, #(_e + 0)];
+ adc RT4hi, RT2hi;
+ stm %r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+ ldr RT2hi, [%sp, #(_e + 4)];
+ ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+ adds RT1lo, RT2lo;
+ ldr RT2lo, [%sp, #(_f + 0)];
+ adc RT1hi, RT2hi;
+ ldr RT2hi, [%sp, #(_f + 4)];
+ adds RWlo, RT2lo;
+ ldr RT2lo, [%sp, #(_g + 0)];
+ adc RWhi, RT2hi;
+ ldr RT2hi, [%sp, #(_g + 4)];
+ adds RT3lo, RT2lo;
+ ldr RT2lo, [%sp, #(_h + 0)];
+ adc RT3hi, RT2hi;
+ ldr RT2hi, [%sp, #(_h + 4)];
+ adds RT4lo, RT2lo;
+ adc RT4hi, RT2hi;
+ stm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+ sub %r0, %r0, #(4 * 8);
+ ldr RWlo, [%sp, #nblks];
+
+ sub RK, #(80 * 8);
+ subs RWlo, #1;
+ bne .Loop_blocks;
+
+.Ldone:
+ mov %r0, #STACK_MAX;
+__out:
+ add %sp, %sp, #STACK_MAX;
+ pop {%r4-%r11, %ip, %pc};
+.size _gcry_sha512_transform_arm,.-_gcry_sha512_transform_arm;
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha512-armv7-neon.S b/comm/third_party/libgcrypt/cipher/sha512-armv7-neon.S
new file mode 100644
index 0000000000..6596f2cdb2
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512-armv7-neon.S
@@ -0,0 +1,450 @@
+/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.text
+
+.syntax unified
+.fpu neon
+.arm
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+
+/* register macros */
+#define RK %r2
+
+#define RA d0
+#define RB d1
+#define RC d2
+#define RD d3
+#define RE d4
+#define RF d5
+#define RG d6
+#define RH d7
+
+#define RT0 d8
+#define RT1 d9
+#define RT2 d10
+#define RT3 d11
+#define RT4 d12
+#define RT5 d13
+#define RT6 d14
+#define RT7 d15
+
+#define RT01q q4
+#define RT23q q5
+#define RT45q q6
+#define RT67q q7
+
+#define RW0 d16
+#define RW1 d17
+#define RW2 d18
+#define RW3 d19
+#define RW4 d20
+#define RW5 d21
+#define RW6 d22
+#define RW7 d23
+#define RW8 d24
+#define RW9 d25
+#define RW10 d26
+#define RW11 d27
+#define RW12 d28
+#define RW13 d29
+#define RW14 d30
+#define RW15 d31
+
+#define RW01q q8
+#define RW23q q9
+#define RW45q q10
+#define RW67q q11
+#define RW89q q12
+#define RW1011q q13
+#define RW1213q q14
+#define RW1415q q15
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
+ /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+ vshr.u64 RT2, re, #14; \
+ vshl.u64 RT3, re, #64 - 14; \
+ interleave_op(arg1); \
+ vshr.u64 RT4, re, #18; \
+ vshl.u64 RT5, re, #64 - 18; \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, re, #41; \
+ vshl.u64 RT5, re, #64 - 41; \
+ vadd.u64 RT0, RT0, rw0; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, re; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, rf, rg; \
+ \
+ vadd.u64 RT1, RT1, rh; \
+ vshr.u64 RT2, ra, #28; \
+ vshl.u64 RT3, ra, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, ra, #34; \
+ vshl.u64 RT5, ra, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* h = Sum0 (a) + Maj (a, b, c); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, ra, #39; \
+ vshl.u64 RT5, ra, #64 - 39; \
+ veor.64 RT0, ra, rb; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rc, rb; \
+ vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+ veor.64 rh, RT2, RT3; \
+ \
+ /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+ vshr.u64 RT2, rd, #14; \
+ vshl.u64 RT3, rd, #64 - 14; \
+ vadd.u64 rh, rh, RT0; \
+ vshr.u64 RT4, rd, #18; \
+ vshl.u64 RT5, rd, #64 - 18; \
+ vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rd, #41; \
+ vshl.u64 RT5, rd, #64 - 41; \
+ vadd.u64 RT0, RT0, rw1; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, rd; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, re, rf; \
+ \
+ vadd.u64 RT1, RT1, rg; \
+ vshr.u64 RT2, rh, #28; \
+ vshl.u64 RT3, rh, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, rh, #34; \
+ vshl.u64 RT5, rh, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* g = Sum0 (h) + Maj (h, a, b); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rh, #39; \
+ vshl.u64 RT5, rh, #64 - 39; \
+ veor.64 RT0, rh, ra; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rb, ra; \
+ vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+ veor.64 rg, RT2, RT3; \
+ \
+ /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
+ /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
+ \
+ /**** S0(w[1:2]) */ \
+ \
+ /* w[0:1] += w[9:10] */ \
+ /* RT23q = rw1:rw2 */ \
+ vext.u64 RT23q, rw01q, rw23q, #1; \
+ vadd.u64 rw0, rw9; \
+ vadd.u64 rg, rg, RT0; \
+ vadd.u64 rw1, rw10;\
+ vadd.u64 rg, rg, RT1; /* g+=t1; */ \
+ \
+ vshr.u64 RT45q, RT23q, #1; \
+ vshl.u64 RT67q, RT23q, #64 - 1; \
+ vshr.u64 RT01q, RT23q, #8; \
+ veor.u64 RT45q, RT45q, RT67q; \
+ vshl.u64 RT67q, RT23q, #64 - 8; \
+ veor.u64 RT45q, RT45q, RT01q; \
+ vshr.u64 RT01q, RT23q, #7; \
+ veor.u64 RT45q, RT45q, RT67q; \
+ \
+ /**** S1(w[14:15]) */ \
+ vshr.u64 RT23q, rw1415q, #6; \
+ veor.u64 RT01q, RT01q, RT45q; \
+ vshr.u64 RT45q, rw1415q, #19; \
+ vshl.u64 RT67q, rw1415q, #64 - 19; \
+ veor.u64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT45q, rw1415q, #61; \
+ veor.u64 RT23q, RT23q, RT67q; \
+ vshl.u64 RT67q, rw1415q, #64 - 61; \
+ veor.u64 RT23q, RT23q, RT45q; \
+ vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
+ veor.u64 RT01q, RT23q, RT67q;
+#define vadd_RT01q(rw01q) \
+ /* w[0:1] += S(w[14:15]) */ \
+ vadd.u64 rw01q, RT01q;
+
+#define dummy(_) /*_*/
+
+#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, interleave_op1, arg1, interleave_op2, arg2) \
+ /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+ vshr.u64 RT2, re, #14; \
+ vshl.u64 RT3, re, #64 - 14; \
+ interleave_op1(arg1); \
+ vshr.u64 RT4, re, #18; \
+ vshl.u64 RT5, re, #64 - 18; \
+ interleave_op2(arg2); \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, re, #41; \
+ vshl.u64 RT5, re, #64 - 41; \
+ vadd.u64 RT0, RT0, rw0; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, re; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, rf, rg; \
+ \
+ vadd.u64 RT1, RT1, rh; \
+ vshr.u64 RT2, ra, #28; \
+ vshl.u64 RT3, ra, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, ra, #34; \
+ vshl.u64 RT5, ra, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* h = Sum0 (a) + Maj (a, b, c); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, ra, #39; \
+ vshl.u64 RT5, ra, #64 - 39; \
+ veor.64 RT0, ra, rb; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rc, rb; \
+ vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+ veor.64 rh, RT2, RT3; \
+ \
+ /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+ vshr.u64 RT2, rd, #14; \
+ vshl.u64 RT3, rd, #64 - 14; \
+ vadd.u64 rh, rh, RT0; \
+ vshr.u64 RT4, rd, #18; \
+ vshl.u64 RT5, rd, #64 - 18; \
+ vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rd, #41; \
+ vshl.u64 RT5, rd, #64 - 41; \
+ vadd.u64 RT0, RT0, rw1; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, rd; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, re, rf; \
+ \
+ vadd.u64 RT1, RT1, rg; \
+ vshr.u64 RT2, rh, #28; \
+ vshl.u64 RT3, rh, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, rh, #34; \
+ vshl.u64 RT5, rh, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* g = Sum0 (h) + Maj (h, a, b); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rh, #39; \
+ vshl.u64 RT5, rh, #64 - 39; \
+ veor.64 RT0, rh, ra; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rb, ra; \
+ vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+ veor.64 rg, RT2, RT3;
+#define vadd_rg_RT0(rg) \
+ vadd.u64 rg, rg, RT0;
+#define vadd_rg_RT1(rg) \
+ vadd.u64 rg, rg, RT1; /* g+=t1; */
+
+.align 3
+.globl _gcry_sha512_transform_armv7_neon
+.type _gcry_sha512_transform_armv7_neon,%function;
+
+_gcry_sha512_transform_armv7_neon:
+ /* Input:
+ * %r0: SHA512_CONTEXT
+ * %r1: data
+ * %r2: u64 k[] constants
+ * %r3: nblks
+ */
+ push {%lr};
+
+ mov %lr, #0;
+
+ /* Load context to d0-d7 */
+ vld1.64 {RA-RD}, [%r0]!;
+ vld1.64 {RE-RH}, [%r0];
+ sub %r0, #(4*8);
+
+ /* Load input to w[16], d16-d31 */
+ /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
+ vld1.64 {RW0-RW3}, [%r1]!;
+ vld1.64 {RW4-RW7}, [%r1]!;
+ vld1.64 {RW8-RW11}, [%r1]!;
+ vld1.64 {RW12-RW15}, [%r1]!;
+#ifdef __ARMEL__
+ /* byteswap */
+ vrev64.8 RW01q, RW01q;
+ vrev64.8 RW23q, RW23q;
+ vrev64.8 RW45q, RW45q;
+ vrev64.8 RW67q, RW67q;
+ vrev64.8 RW89q, RW89q;
+ vrev64.8 RW1011q, RW1011q;
+ vrev64.8 RW1213q, RW1213q;
+ vrev64.8 RW1415q, RW1415q;
+#endif
+
+ /* EABI says that d8-d15 must be preserved by callee. */
+ vpush {RT0-RT7};
+
+.Loop:
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, dummy, _);
+ b .Lenter_rounds;
+
+.Loop_rounds:
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
+.Lenter_rounds:
+ rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
+ rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
+ rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
+ rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
+ add %lr, #16;
+ rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
+ cmp %lr, #64;
+ rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
+ bne .Loop_rounds;
+
+ subs %r3, #1;
+
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, vadd_RT01q, RW1415q, dummy, _);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+ beq .Lhandle_tail;
+ vld1.64 {RW0-RW3}, [%r1]!;
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+ vrev64.8 RW01q, RW01q;
+ vrev64.8 RW23q, RW23q;
+#endif
+ vld1.64 {RW4-RW7}, [%r1]!;
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+#ifdef __ARMEL__
+ vrev64.8 RW45q, RW45q;
+ vrev64.8 RW67q, RW67q;
+#endif
+ vld1.64 {RW8-RW11}, [%r1]!;
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+ vrev64.8 RW89q, RW89q;
+ vrev64.8 RW1011q, RW1011q;
+#endif
+ vld1.64 {RW12-RW15}, [%r1]!;
+ vadd_rg_RT0(RA);
+ vadd_rg_RT1(RA);
+
+ /* Load context */
+ vld1.64 {RT0-RT3}, [%r0]!;
+ vld1.64 {RT4-RT7}, [%r0];
+ sub %r0, #(4*8);
+
+#ifdef __ARMEL__
+ vrev64.8 RW1213q, RW1213q;
+ vrev64.8 RW1415q, RW1415q;
+#endif
+
+ vadd.u64 RA, RT0;
+ vadd.u64 RB, RT1;
+ vadd.u64 RC, RT2;
+ vadd.u64 RD, RT3;
+ vadd.u64 RE, RT4;
+ vadd.u64 RF, RT5;
+ vadd.u64 RG, RT6;
+ vadd.u64 RH, RT7;
+
+ /* Store the first half of context */
+ vst1.64 {RA-RD}, [%r0]!;
+ sub RK, $(8*80);
+ vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+ mov %lr, #0;
+ sub %r0, #(4*8);
+
+ b .Loop;
+.ltorg
+
+.Lhandle_tail:
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+
+ /* Load context to d16-d23 */
+ vld1.64 {RW0-RW3}, [%r0]!;
+ vadd_rg_RT0(RA);
+ vld1.64 {RW4-RW7}, [%r0];
+ vadd_rg_RT1(RA);
+ sub %r0, #(4*8);
+
+ vadd.u64 RA, RW0;
+ vadd.u64 RB, RW1;
+ vadd.u64 RC, RW2;
+ vadd.u64 RD, RW3;
+ vadd.u64 RE, RW4;
+ vadd.u64 RF, RW5;
+ vadd.u64 RG, RW6;
+ vadd.u64 RH, RW7;
+
+ /* Store the first half of context */
+ vst1.64 {RA-RD}, [%r0]!;
+
+ /* Clear used registers */
+ /* d16-d31 */
+ veor.u64 RW01q, RW01q;
+ veor.u64 RW23q, RW23q;
+ veor.u64 RW45q, RW45q;
+ veor.u64 RW67q, RW67q;
+ vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+ veor.u64 RW89q, RW89q;
+ veor.u64 RW1011q, RW1011q;
+ veor.u64 RW1213q, RW1213q;
+ veor.u64 RW1415q, RW1415q;
+ /* d8-d15 */
+ vpop {RT0-RT7};
+ /* d0-d7 (q0-q3) */
+ veor.u64 %q0, %q0;
+ veor.u64 %q1, %q1;
+ veor.u64 %q2, %q2;
+ veor.u64 %q3, %q3;
+
+ eor %r0, %r0;
+ pop {%pc};
+.size _gcry_sha512_transform_armv7_neon,.-_gcry_sha512_transform_armv7_neon;
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha512-avx-amd64.S b/comm/third_party/libgcrypt/cipher/sha512-avx-amd64.S
new file mode 100644
index 0000000000..75f7b07059
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512-avx-amd64.S
@@ -0,0 +1,461 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the
+; distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ * by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA512)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+.text
+
+/* Virtual Registers */
+#define msg rdi /* ARG1 */
+#define digest rsi /* ARG2 */
+#define msglen rdx /* ARG3 */
+#define T1 rcx
+#define T2 r8
+#define a_64 r9
+#define b_64 r10
+#define c_64 r11
+#define d_64 r12
+#define e_64 r13
+#define f_64 r14
+#define g_64 r15
+#define h_64 rbx
+#define tmp0 rax
+
+/*
+; Local variables (stack frame)
+; Note: frame_size must be an odd multiple of 8 bytes to XMM align RSP
+*/
+#define frame_W 0 /* Message Schedule */
+#define frame_W_size (80 * 8)
+#define frame_WK ((frame_W) + (frame_W_size)) /* W[t] + K[t] | W[t+1] + K[t+1] */
+#define frame_WK_size (2 * 8)
+#define frame_GPRSAVE ((frame_WK) + (frame_WK_size))
+#define frame_GPRSAVE_size (5 * 8)
+#define frame_size ((frame_GPRSAVE) + (frame_GPRSAVE_size))
+
+
+/* Useful QWORD "arrays" for simpler memory references */
+#define MSG(i) msg + 8*(i) /* Input message (arg1) */
+#define DIGEST(i) digest + 8*(i) /* Output Digest (arg2) */
+#define K_t(i) .LK512 + 8*(i) ADD_RIP /* SHA Constants (static mem) */
+#define W_t(i) rsp + frame_W + 8*(i) /* Message Schedule (stack frame) */
+#define WK_2(i) rsp + frame_WK + 8*((i) % 2) /* W[t]+K[t] (stack frame) */
+/* MSG, DIGEST, K_t, W_t are arrays */
+/* WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even */
+
+#define RORQ(p1, p2) \
+ /* shld is faster than ror on Intel Sandybridge */ \
+ shld p1, p1, (64 - p2)
+
+#define SHA512_Round(t, a, b, c, d, e, f, g, h) \
+ /* Compute Round %%t */; \
+ mov T1, f /* T1 = f */; \
+ mov tmp0, e /* tmp = e */; \
+ xor T1, g /* T1 = f ^ g */; \
+ RORQ( tmp0, 23) /* 41 ; tmp = e ror 23 */; \
+ and T1, e /* T1 = (f ^ g) & e */; \
+ xor tmp0, e /* tmp = (e ror 23) ^ e */; \
+ xor T1, g /* T1 = ((f ^ g) & e) ^ g = CH(e,f,g) */; \
+ add T1, [WK_2(t)] /* W[t] + K[t] from message scheduler */; \
+ RORQ( tmp0, 4) /* 18 ; tmp = ((e ror 23) ^ e) ror 4 */; \
+ xor tmp0, e /* tmp = (((e ror 23) ^ e) ror 4) ^ e */; \
+ mov T2, a /* T2 = a */; \
+ add T1, h /* T1 = CH(e,f,g) + W[t] + K[t] + h */; \
+ RORQ( tmp0, 14) /* 14 ; tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) */; \
+ add T1, tmp0 /* T1 = CH(e,f,g) + W[t] + K[t] + S1(e) */; \
+ mov tmp0, a /* tmp = a */; \
+ xor T2, c /* T2 = a ^ c */; \
+ and tmp0, c /* tmp = a & c */; \
+ and T2, b /* T2 = (a ^ c) & b */; \
+ xor T2, tmp0 /* T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) */; \
+ mov tmp0, a /* tmp = a */; \
+ RORQ( tmp0, 5) /* 39 ; tmp = a ror 5 */; \
+ xor tmp0, a /* tmp = (a ror 5) ^ a */; \
+ add d, T1 /* e(next_state) = d + T1 */; \
+ RORQ( tmp0, 6) /* 34 ; tmp = ((a ror 5) ^ a) ror 6 */; \
+ xor tmp0, a /* tmp = (((a ror 5) ^ a) ror 6) ^ a */; \
+ lea h, [T1 + T2] /* a(next_state) = T1 + Maj(a,b,c) */; \
+ RORQ( tmp0, 28) /* 28 ; tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) */; \
+ add h, tmp0 /* a(next_state) = T1 + Maj(a,b,c) S0(a) */
+
+#define SHA512_2Sched_2Round_avx_PART1(t, a, b, c, d, e, f, g, h) \
+ /* \
+ ; Compute rounds %%t-2 and %%t-1 \
+ ; Compute message schedule QWORDS %%t and %%t+1 \
+ ; \
+ ; Two rounds are computed based on the values for K[t-2]+W[t-2] and \
+ ; K[t-1]+W[t-1] which were previously stored at WK_2 by the message \
+ ; scheduler. \
+ ; The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)]. \
+ ; They are then added to their respective SHA512 constants at \
+ ; [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] \
+ ; For brievity, the comments following vectored instructions only refer to \
+ ; the first of a pair of QWORDS. \
+ ; Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]} \
+ ; The computation of the message schedule and the rounds are tightly \
+ ; stitched to take advantage of instruction-level parallelism. \
+ ; For clarity, integer instructions (for the rounds calculation) are indented \
+ ; by one tab. Vectored instructions (for the message scheduler) are indented \
+ ; by two tabs. \
+ */ \
+ \
+ vmovdqa xmm4, [W_t(t-2)] /* XMM4 = W[t-2] */; \
+ vmovdqu xmm5, [W_t(t-15)] /* XMM5 = W[t-15] */; \
+ mov T1, f; \
+ vpsrlq xmm0, xmm4, 61 /* XMM0 = W[t-2]>>61 */; \
+ mov tmp0, e; \
+ vpsrlq xmm6, xmm5, 1 /* XMM6 = W[t-15]>>1 */; \
+ xor T1, g; \
+ RORQ( tmp0, 23) /* 41 */; \
+ vpsrlq xmm1, xmm4, 19 /* XMM1 = W[t-2]>>19 */; \
+ and T1, e; \
+ xor tmp0, e; \
+ vpxor xmm0, xmm0, xmm1 /* XMM0 = W[t-2]>>61 ^ W[t-2]>>19 */; \
+ xor T1, g; \
+ add T1, [WK_2(t)]; \
+ vpsrlq xmm7, xmm5, 8 /* XMM7 = W[t-15]>>8 */; \
+ RORQ( tmp0, 4) /* 18 */; \
+ vpsrlq xmm2, xmm4, 6 /* XMM2 = W[t-2]>>6 */; \
+ xor tmp0, e; \
+ mov T2, a; \
+ add T1, h; \
+ vpxor xmm6, xmm6, xmm7 /* XMM6 = W[t-15]>>1 ^ W[t-15]>>8 */; \
+ RORQ( tmp0, 14) /* 14 */; \
+ add T1, tmp0; \
+ vpsrlq xmm8, xmm5, 7 /* XMM8 = W[t-15]>>7 */; \
+ mov tmp0, a; \
+ xor T2, c; \
+ vpsllq xmm3, xmm4, (64-61) /* XMM3 = W[t-2]<<3 */; \
+ and tmp0, c; \
+ and T2, b; \
+ vpxor xmm2, xmm2, xmm3 /* XMM2 = W[t-2]>>6 ^ W[t-2]<<3 */; \
+ xor T2, tmp0; \
+ mov tmp0, a; \
+ vpsllq xmm9, xmm5, (64-1) /* XMM9 = W[t-15]<<63 */; \
+ RORQ( tmp0, 5) /* 39 */; \
+ vpxor xmm8, xmm8, xmm9 /* XMM8 = W[t-15]>>7 ^ W[t-15]<<63 */; \
+ xor tmp0, a; \
+ add d, T1; \
+ RORQ( tmp0, 6) /* 34 */; \
+ xor tmp0, a; \
+ vpxor xmm6, xmm6, xmm8 /* XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^ W[t-15]>>7 ^ W[t-15]<<63 */; \
+ lea h, [T1 + T2]; \
+ RORQ( tmp0, 28) /* 28 */; \
+ vpsllq xmm4, xmm4, (64-19) /* XMM4 = W[t-2]<<25 */; \
+ add h, tmp0
+
+#define SHA512_2Sched_2Round_avx_PART2(t, a, b, c, d, e, f, g, h) \
+ vpxor xmm0, xmm0, xmm4 /* XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^ W[t-2]<<25 */; \
+ mov T1, f; \
+ vpxor xmm0, xmm0, xmm2 /* XMM0 = s1(W[t-2]) */; \
+ mov tmp0, e; \
+ xor T1, g; \
+ vpaddq xmm0, xmm0, [W_t(t-16)] /* XMM0 = s1(W[t-2]) + W[t-16] */; \
+ vmovdqu xmm1, [W_t(t- 7)] /* XMM1 = W[t-7] */; \
+ RORQ( tmp0, 23) /* 41 */; \
+ and T1, e; \
+ xor tmp0, e; \
+ xor T1, g; \
+ vpsllq xmm5, xmm5, (64-8) /* XMM5 = W[t-15]<<56 */; \
+ add T1, [WK_2(t+1)]; \
+ vpxor xmm6, xmm6, xmm5 /* XMM6 = s0(W[t-15]) */; \
+ RORQ( tmp0, 4) /* 18 */; \
+ vpaddq xmm0, xmm0, xmm6 /* XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15]) */; \
+ xor tmp0, e; \
+ vpaddq xmm0, xmm0, xmm1 /* XMM0 = W[t] = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16] */; \
+ mov T2, a; \
+ add T1, h; \
+ RORQ( tmp0, 14) /* 14 */; \
+ add T1, tmp0; \
+ vmovdqa [W_t(t)], xmm0 /* Store W[t] */; \
+ vpaddq xmm0, xmm0, [K_t(t)] /* Compute W[t]+K[t] */; \
+ vmovdqa [WK_2(t)], xmm0 /* Store W[t]+K[t] for next rounds */; \
+ mov tmp0, a; \
+ xor T2, c; \
+ and tmp0, c; \
+ and T2, b; \
+ xor T2, tmp0; \
+ mov tmp0, a; \
+ RORQ( tmp0, 5) /* 39 */; \
+ xor tmp0, a; \
+ add d, T1; \
+ RORQ( tmp0, 6) /* 34 */; \
+ xor tmp0, a; \
+ lea h, [T1 + T2]; \
+ RORQ( tmp0, 28) /* 28 */; \
+ add h, tmp0
+
+#define SHA512_2Sched_2Round_avx(t, a, b, c, d, e, f, g, h) \
+ SHA512_2Sched_2Round_avx_PART1(t, a, b, c, d, e, f, g, h); \
+ SHA512_2Sched_2Round_avx_PART2(t, h, a, b, c, d, e, f, g)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; void sha512_avx(const void* M, void* D, uint64_t L);
+; Purpose: Updates the SHA512 digest stored at D with the message stored in M.
+; The size of the message pointed to by M must be an integer multiple of SHA512
+; message blocks.
+; L is the message length in SHA512 blocks
+*/
+.globl _gcry_sha512_transform_amd64_avx
+ELF(.type _gcry_sha512_transform_amd64_avx,@function;)
+.align 16
+_gcry_sha512_transform_amd64_avx:
+ CFI_STARTPROC()
+ xor eax, eax
+
+ cmp msglen, 0
+ je .Lnowork
+
+ vzeroupper
+
+ /* Allocate Stack Space */
+ sub rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(frame_size);
+
+ /* Save GPRs */
+ mov [rsp + frame_GPRSAVE + 8 * 0], rbx
+ mov [rsp + frame_GPRSAVE + 8 * 1], r12
+ mov [rsp + frame_GPRSAVE + 8 * 2], r13
+ mov [rsp + frame_GPRSAVE + 8 * 3], r14
+ mov [rsp + frame_GPRSAVE + 8 * 4], r15
+ CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0);
+ CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1);
+ CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2);
+ CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3);
+ CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4);
+
+.Lupdateblock:
+
+ /* Load state variables */
+ mov a_64, [DIGEST(0)]
+ mov b_64, [DIGEST(1)]
+ mov c_64, [DIGEST(2)]
+ mov d_64, [DIGEST(3)]
+ mov e_64, [DIGEST(4)]
+ mov f_64, [DIGEST(5)]
+ mov g_64, [DIGEST(6)]
+ mov h_64, [DIGEST(7)]
+
+ /* BSWAP 2 QWORDS */
+ vmovdqa xmm1, [.LXMM_QWORD_BSWAP ADD_RIP]
+ vmovdqu xmm0, [MSG(0)]
+ vpshufb xmm0, xmm0, xmm1 /* BSWAP */
+ vmovdqa [W_t(0)], xmm0 /* Store Scheduled Pair */
+ vpaddq xmm0, xmm0, [K_t(0)] /* Compute W[t]+K[t] */
+ vmovdqa [WK_2(0)], xmm0 /* Store into WK for rounds */
+
+ #define T_2_14(t, a, b, c, d, e, f, g, h) \
+ /* BSWAP 2 QWORDS, Compute 2 Rounds */; \
+ vmovdqu xmm0, [MSG(t)]; \
+ vpshufb xmm0, xmm0, xmm1 /* BSWAP */; \
+ SHA512_Round(((t) - 2), a##_64, b##_64, c##_64, d##_64, \
+ e##_64, f##_64, g##_64, h##_64); \
+ vmovdqa [W_t(t)], xmm0 /* Store Scheduled Pair */; \
+ vpaddq xmm0, xmm0, [K_t(t)] /* Compute W[t]+K[t] */; \
+ SHA512_Round(((t) - 1), h##_64, a##_64, b##_64, c##_64, \
+ d##_64, e##_64, f##_64, g##_64); \
+ vmovdqa [WK_2(t)], xmm0 /* W[t]+K[t] into WK */
+
+ #define T_16_78(t, a, b, c, d, e, f, g, h) \
+ SHA512_2Sched_2Round_avx((t), a##_64, b##_64, c##_64, d##_64, \
+ e##_64, f##_64, g##_64, h##_64)
+
+ #define T_80(t, a, b, c, d, e, f, g, h) \
+ /* Compute 2 Rounds */; \
+ SHA512_Round((t - 2), a##_64, b##_64, c##_64, d##_64, \
+ e##_64, f##_64, g##_64, h##_64); \
+ SHA512_Round((t - 1), h##_64, a##_64, b##_64, c##_64, \
+ d##_64, e##_64, f##_64, g##_64)
+
+ T_2_14(2, a, b, c, d, e, f, g, h)
+ T_2_14(4, g, h, a, b, c, d, e, f)
+ T_2_14(6, e, f, g, h, a, b, c, d)
+ T_2_14(8, c, d, e, f, g, h, a, b)
+ T_2_14(10, a, b, c, d, e, f, g, h)
+ T_2_14(12, g, h, a, b, c, d, e, f)
+ T_2_14(14, e, f, g, h, a, b, c, d)
+ T_16_78(16, c, d, e, f, g, h, a, b)
+ T_16_78(18, a, b, c, d, e, f, g, h)
+ T_16_78(20, g, h, a, b, c, d, e, f)
+ T_16_78(22, e, f, g, h, a, b, c, d)
+ T_16_78(24, c, d, e, f, g, h, a, b)
+ T_16_78(26, a, b, c, d, e, f, g, h)
+ T_16_78(28, g, h, a, b, c, d, e, f)
+ T_16_78(30, e, f, g, h, a, b, c, d)
+ T_16_78(32, c, d, e, f, g, h, a, b)
+ T_16_78(34, a, b, c, d, e, f, g, h)
+ T_16_78(36, g, h, a, b, c, d, e, f)
+ T_16_78(38, e, f, g, h, a, b, c, d)
+ T_16_78(40, c, d, e, f, g, h, a, b)
+ T_16_78(42, a, b, c, d, e, f, g, h)
+ T_16_78(44, g, h, a, b, c, d, e, f)
+ T_16_78(46, e, f, g, h, a, b, c, d)
+ T_16_78(48, c, d, e, f, g, h, a, b)
+ T_16_78(50, a, b, c, d, e, f, g, h)
+ T_16_78(52, g, h, a, b, c, d, e, f)
+ T_16_78(54, e, f, g, h, a, b, c, d)
+ T_16_78(56, c, d, e, f, g, h, a, b)
+ T_16_78(58, a, b, c, d, e, f, g, h)
+ T_16_78(60, g, h, a, b, c, d, e, f)
+ T_16_78(62, e, f, g, h, a, b, c, d)
+ T_16_78(64, c, d, e, f, g, h, a, b)
+ T_16_78(66, a, b, c, d, e, f, g, h)
+ T_16_78(68, g, h, a, b, c, d, e, f)
+ T_16_78(70, e, f, g, h, a, b, c, d)
+ T_16_78(72, c, d, e, f, g, h, a, b)
+ T_16_78(74, a, b, c, d, e, f, g, h)
+ T_16_78(76, g, h, a, b, c, d, e, f)
+ T_16_78(78, e, f, g, h, a, b, c, d)
+ T_80(80, c, d, e, f, g, h, a, b)
+
+ /* Update digest */
+ add [DIGEST(0)], a_64
+ add [DIGEST(1)], b_64
+ add [DIGEST(2)], c_64
+ add [DIGEST(3)], d_64
+ add [DIGEST(4)], e_64
+ add [DIGEST(5)], f_64
+ add [DIGEST(6)], g_64
+ add [DIGEST(7)], h_64
+
+ /* Advance to next message block */
+ add msg, 16*8
+ dec msglen
+ jnz .Lupdateblock
+
+ /* Restore GPRs */
+ mov rbx, [rsp + frame_GPRSAVE + 8 * 0]
+ mov r12, [rsp + frame_GPRSAVE + 8 * 1]
+ mov r13, [rsp + frame_GPRSAVE + 8 * 2]
+ mov r14, [rsp + frame_GPRSAVE + 8 * 3]
+ mov r15, [rsp + frame_GPRSAVE + 8 * 4]
+ CFI_RESTORE(rbx)
+ CFI_RESTORE(r12)
+ CFI_RESTORE(r13)
+ CFI_RESTORE(r14)
+ CFI_RESTORE(r15)
+
+ vzeroall
+
+ /* Burn stack */
+ mov eax, 0
+.Lerase_stack:
+ vmovdqu [rsp + rax], ymm0
+ add eax, 32
+ cmp eax, frame_W_size
+ jne .Lerase_stack
+ vmovdqu [rsp + frame_WK], xmm0
+ xor eax, eax
+
+ /* Restore Stack Pointer */
+ add rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(-frame_size);
+
+.Lnowork:
+ ret
+ CFI_ENDPROC()
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Binary Data
+*/
+
+.align 16
+
+/* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */
+.LXMM_QWORD_BSWAP:
+ .octa 0x08090a0b0c0d0e0f0001020304050607
+
+/* K[t] used in SHA512 hashing */
+.LK512:
+ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+ .quad 0x3956c25bf348b538,0x59f111f1b605d019
+ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+ .quad 0xd807aa98a3030242,0x12835b0145706fbe
+ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+ .quad 0x06ca6351e003826f,0x142929670a0e6e70
+ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+ .quad 0x81c2c92e47edaee6,0x92722c851482353b
+ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+ .quad 0xd192e819d6ef5218,0xd69906245565a910
+ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+ .quad 0x90befffa23631e28,0xa4506cebde82bde9
+ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+ .quad 0xca273eceea26619c,0xd186b8c721c0c207
+ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+ .quad 0x113f9804bef90dae,0x1b710b35131c471b
+ .quad 0x28db77f523047d84,0x32caab7b40c72493
+ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S b/comm/third_party/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S
new file mode 100644
index 0000000000..7f119e6c10
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S
@@ -0,0 +1,502 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the
+; distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 1 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ * by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ defined(USE_SHA512)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+.text
+
+/* Virtual Registers */
+#define Y_0 ymm4
+#define Y_1 ymm5
+#define Y_2 ymm6
+#define Y_3 ymm7
+
+#define YTMP0 ymm0
+#define YTMP1 ymm1
+#define YTMP2 ymm2
+#define YTMP3 ymm3
+#define YTMP4 ymm8
+#define XFER YTMP0
+
+#define BYTE_FLIP_MASK ymm9
+#define MASK_YMM_LO ymm10
+#define MASK_YMM_LOx xmm10
+
+#define INP rdi /* 1st arg */
+#define CTX rsi /* 2nd arg */
+#define NUM_BLKS rdx /* 3rd arg */
+#define c rcx
+#define d r8
+#define e rdx
+#define y3 rdi
+
+#define TBL rbp
+
+#define a rax
+#define b rbx
+
+#define f r9
+#define g r10
+#define h r11
+
+#define T1 r12
+#define y0 r13
+#define y1 r14
+#define y2 r15
+
+#define y4 r12
+
+/* Local variables (stack frame) */
+#define frame_XFER 0
+#define frame_XFER_size (4*4*8)
+#define frame_SRND (frame_XFER + frame_XFER_size)
+#define frame_SRND_size (1*8)
+#define frame_INP (frame_SRND + frame_SRND_size)
+#define frame_INP_size (1*8)
+#define frame_NBLKS (frame_INP + frame_INP_size)
+#define frame_NBLKS_size (1*8)
+#define frame_RSPSAVE (frame_NBLKS + frame_NBLKS_size)
+#define frame_RSPSAVE_size (1*8)
+#define frame_GPRSAVE (frame_RSPSAVE + frame_RSPSAVE_size)
+#define frame_GPRSAVE_size (6*8)
+#define frame_size (frame_GPRSAVE + frame_GPRSAVE_size)
+
+#define VMOVDQ vmovdqu /*; assume buffers not aligned */
+
+/* addm [mem], reg */
+/* Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+ add p2, p1; \
+ mov p1, p2;
+
+
+/* COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask */
+/* Load ymm with mem and byte swap each dword */
+#define COPY_YMM_AND_BSWAP(p1, p2, p3) \
+ VMOVDQ p1, p2; \
+ vpshufb p1, p1, p3
+
+/* %macro MY_VPALIGNR YDST, YSRC1, YSRC2, RVAL */
+/* YDST = {YSRC1, YSRC2} >> RVAL*8 */
+#define MY_VPALIGNR(YDST, YSRC1, YSRC2, RVAL) \
+ vperm2i128 YDST, YSRC1, YSRC2, 0x3 /* YDST = {YS1_LO, YS2_HI} */; \
+ vpalignr YDST, YDST, YSRC2, RVAL /* YDST = {YDS1, YS2} >> RVAL*8 */
+
+#define ONE_ROUND_PART1(XFERIN, a, b, c, d, e, f, g, h) \
+ /* h += Sum1 (e) + Ch (e, f, g) + (k[t] + w[0]); \
+ * d += h; \
+ * h += Sum0 (a) + Maj (a, b, c); \
+ * \
+ * Ch(x, y, z) => ((x & y) + (~x & z)) \
+ * Maj(x, y, z) => ((x & y) + (z & (x ^ y))) \
+ */ \
+ \
+ mov y3, e; \
+ add h, [XFERIN]; \
+ and y3, f; \
+ rorx y0, e, 41; \
+ rorx y1, e, 18; \
+ lea h, [h + y3]; \
+ andn y3, e, g; \
+ rorx T1, a, 34; \
+ xor y0, y1; \
+ lea h, [h + y3]
+
+#define ONE_ROUND_PART2(a, b, c, d, e, f, g, h) \
+ rorx y2, a, 39; \
+ rorx y1, e, 14; \
+ mov y3, a; \
+ xor T1, y2; \
+ xor y0, y1; \
+ xor y3, b; \
+ lea h, [h + y0]; \
+ mov y0, a; \
+ rorx y2, a, 28; \
+ add d, h; \
+ and y3, c; \
+ xor T1, y2; \
+ lea h, [h + y3]; \
+ lea h, [h + T1]; \
+ and y0, b; \
+ lea h, [h + y0]
+
+#define ONE_ROUND(XFERIN, a, b, c, d, e, f, g, h) \
+ ONE_ROUND_PART1(XFERIN, a, b, c, d, e, f, g, h); \
+ ONE_ROUND_PART2(a, b, c, d, e, f, g, h)
+
+#define FOUR_ROUNDS_AND_SCHED(X, Y_0, Y_1, Y_2, Y_3, a, b, c, d, e, f, g, h) \
+ /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ /* Extract w[t-7] */; \
+ MY_VPALIGNR( YTMP0, Y_3, Y_2, 8) /* YTMP0 = W[-7] */; \
+ /* Calculate w[t-16] + w[t-7] */; \
+ vpaddq YTMP0, YTMP0, Y_0 /* YTMP0 = W[-7] + W[-16] */; \
+ /* Extract w[t-15] */; \
+ MY_VPALIGNR( YTMP1, Y_1, Y_0, 8) /* YTMP1 = W[-15] */; \
+ \
+ /* Calculate sigma0 */; \
+ \
+ /* Calculate w[t-15] ror 1 */; \
+ vpsrlq YTMP2, YTMP1, 1; \
+ vpsllq YTMP3, YTMP1, (64-1); \
+ vpor YTMP3, YTMP3, YTMP2 /* YTMP3 = W[-15] ror 1 */; \
+ /* Calculate w[t-15] shr 7 */; \
+ vpsrlq YTMP4, YTMP1, 7 /* YTMP4 = W[-15] >> 7 */; \
+ \
+ ONE_ROUND(rsp+frame_XFER+0*8+X*32, a, b, c, d, e, f, g, h); \
+ \
+ /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ /* Calculate w[t-15] ror 8 */; \
+ vpsrlq YTMP2, YTMP1, 8; \
+ vpsllq YTMP1, YTMP1, (64-8); \
+ vpor YTMP1, YTMP1, YTMP2 /* YTMP1 = W[-15] ror 8 */; \
+ /* XOR the three components */; \
+ vpxor YTMP3, YTMP3, YTMP4 /* YTMP3 = W[-15] ror 1 ^ W[-15] >> 7 */; \
+ vpxor YTMP1, YTMP3, YTMP1 /* YTMP1 = s0 */; \
+ \
+ /* Add three components, w[t-16], w[t-7] and sigma0 */; \
+ vpaddq YTMP0, YTMP0, YTMP1 /* YTMP0 = W[-16] + W[-7] + s0 */; \
+ /* Move to appropriate lanes for calculating w[16] and w[17] */; \
+ vperm2i128 Y_0, YTMP0, YTMP0, 0x0 /* Y_0 = W[-16] + W[-7] + s0 {BABA} */; \
+ /* Move to appropriate lanes for calculating w[18] and w[19] */; \
+ vpand YTMP0, YTMP0, MASK_YMM_LO /* YTMP0 = W[-16] + W[-7] + s0 {DC00} */; \
+ \
+ /* Calculate w[16] and w[17] in both 128 bit lanes */; \
+ \
+ /* Calculate sigma1 for w[16] and w[17] on both 128 bit lanes */; \
+ vperm2i128 YTMP2, Y_3, Y_3, 0x11 /* YTMP2 = W[-2] {BABA} */; \
+ vpsrlq YTMP4, YTMP2, 6 /* YTMP4 = W[-2] >> 6 {BABA} */; \
+ \
+ ONE_ROUND(rsp+frame_XFER+1*8+X*32, h, a, b, c, d, e, f, g); \
+ \
+ /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ vpsrlq YTMP3, YTMP2, 19 /* YTMP3 = W[-2] >> 19 {BABA} */; \
+ vpsllq YTMP1, YTMP2, (64-19) /* YTMP1 = W[-2] << 19 {BABA} */; \
+ vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 19 {BABA} */; \
+ vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {BABA} */; \
+ vpsrlq YTMP3, YTMP2, 61 /* YTMP3 = W[-2] >> 61 {BABA} */; \
+ vpsllq YTMP1, YTMP2, (64-61) /* YTMP1 = W[-2] << 61 {BABA} */; \
+ vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 61 {BABA} */; \
+ vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) {BABA} */; \
+ \
+ /* Add sigma1 to the other compunents to get w[16] and w[17] */; \
+ vpaddq Y_0, Y_0, YTMP4 /* Y_0 = {W[1], W[0], W[1], W[0]} */; \
+ \
+ /* Calculate sigma1 for w[18] and w[19] for upper 128 bit lane */; \
+ vpsrlq YTMP4, Y_0, 6 /* YTMP4 = W[-2] >> 6 {DC--} */; \
+ \
+ ONE_ROUND(rsp+frame_XFER+2*8+X*32, g, h, a, b, c, d, e, f); \
+ \
+ /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+ vpsrlq YTMP3, Y_0, 19 /* YTMP3 = W[-2] >> 19 {DC--} */; \
+ vpsllq YTMP1, Y_0, (64-19) /* YTMP1 = W[-2] << 19 {DC--} */; \
+ vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 19 {DC--} */; \
+ vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {DC--} */; \
+ vpsrlq YTMP3, Y_0, 61 /* YTMP3 = W[-2] >> 61 {DC--} */; \
+ vpsllq YTMP1, Y_0, (64-61) /* YTMP1 = W[-2] << 61 {DC--} */; \
+ vpor YTMP3, YTMP3, YTMP1 /* YTMP3 = W[-2] ror 61 {DC--} */; \
+ vpxor YTMP4, YTMP4, YTMP3 /* YTMP4 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) {DC--} */; \
+ \
+ /* Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19] to newly calculated sigma1 to get w[18] and w[19] */; \
+ vpaddq YTMP2, YTMP0, YTMP4 /* YTMP2 = {W[3], W[2], --, --} */; \
+ \
+ /* Form w[19, w[18], w17], w[16] */; \
+ vpblendd Y_0, Y_0, YTMP2, 0xF0 /* Y_0 = {W[3], W[2], W[1], W[0]} */; \
+ \
+ ONE_ROUND_PART1(rsp+frame_XFER+3*8+X*32, f, g, h, a, b, c, d, e); \
+ vpaddq XFER, Y_0, [TBL + (4+X)*32]; \
+ vmovdqa [rsp + frame_XFER + X*32], XFER; \
+ ONE_ROUND_PART2(f, g, h, a, b, c, d, e)
+
+#define DO_4ROUNDS(X, a, b, c, d, e, f, g, h) \
+ ONE_ROUND(rsp+frame_XFER+0*8+X*32, a, b, c, d, e, f, g, h); \
+ ONE_ROUND(rsp+frame_XFER+1*8+X*32, h, a, b, c, d, e, f, g); \
+ ONE_ROUND(rsp+frame_XFER+2*8+X*32, g, h, a, b, c, d, e, f); \
+ ONE_ROUND(rsp+frame_XFER+3*8+X*32, f, g, h, a, b, c, d, e)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; void sha512_rorx(const void* M, void* D, uint64_t L);
+; Purpose: Updates the SHA512 digest stored at D with the message stored in M.
+; The size of the message pointed to by M must be an integer multiple of SHA512
+; message blocks.
+; L is the message length in SHA512 blocks
+*/
+.globl _gcry_sha512_transform_amd64_avx2
+ELF(.type _gcry_sha512_transform_amd64_avx2,@function;)
+.align 16
+_gcry_sha512_transform_amd64_avx2:
+ CFI_STARTPROC()
+ xor eax, eax
+
+ cmp rdx, 0
+ je .Lnowork
+
+ vzeroupper
+
+ /* Allocate Stack Space */
+ mov rax, rsp
+ CFI_DEF_CFA_REGISTER(rax);
+ sub rsp, frame_size
+ and rsp, ~(0x40 - 1)
+ mov [rsp + frame_RSPSAVE], rax
+ CFI_CFA_ON_STACK(frame_RSPSAVE, 0)
+
+ /* Save GPRs */
+ mov [rsp + frame_GPRSAVE + 8 * 0], rbp
+ mov [rsp + frame_GPRSAVE + 8 * 1], rbx
+ mov [rsp + frame_GPRSAVE + 8 * 2], r12
+ mov [rsp + frame_GPRSAVE + 8 * 3], r13
+ mov [rsp + frame_GPRSAVE + 8 * 4], r14
+ mov [rsp + frame_GPRSAVE + 8 * 5], r15
+ CFI_REG_ON_STACK(rbp, frame_GPRSAVE + 8 * 0)
+ CFI_REG_ON_STACK(rbx, frame_GPRSAVE + 8 * 1)
+ CFI_REG_ON_STACK(r12, frame_GPRSAVE + 8 * 2)
+ CFI_REG_ON_STACK(r13, frame_GPRSAVE + 8 * 3)
+ CFI_REG_ON_STACK(r14, frame_GPRSAVE + 8 * 4)
+ CFI_REG_ON_STACK(r15, frame_GPRSAVE + 8 * 5)
+
+ mov [rsp + frame_NBLKS], NUM_BLKS
+
+ /*; load initial digest */
+ mov a,[8*0 + CTX]
+ mov b,[8*1 + CTX]
+ mov c,[8*2 + CTX]
+ mov d,[8*3 + CTX]
+ mov e,[8*4 + CTX]
+ mov f,[8*5 + CTX]
+ mov g,[8*6 + CTX]
+ mov h,[8*7 + CTX]
+
+ vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+ vmovdqa MASK_YMM_LO, [.LMASK_YMM_LO ADD_RIP]
+
+ lea TBL,[.LK512 ADD_RIP]
+
+ /*; byte swap first 16 dwords */
+ COPY_YMM_AND_BSWAP(Y_0, [INP + 0*32], BYTE_FLIP_MASK)
+ COPY_YMM_AND_BSWAP(Y_1, [INP + 1*32], BYTE_FLIP_MASK)
+ COPY_YMM_AND_BSWAP(Y_2, [INP + 2*32], BYTE_FLIP_MASK)
+ COPY_YMM_AND_BSWAP(Y_3, [INP + 3*32], BYTE_FLIP_MASK)
+
+ add INP, 128
+ mov [rsp + frame_INP], INP
+
+ vpaddq XFER, Y_0, [TBL + 0*32]
+ vmovdqa [rsp + frame_XFER + 0*32], XFER
+ vpaddq XFER, Y_1, [TBL + 1*32]
+ vmovdqa [rsp + frame_XFER + 1*32], XFER
+ vpaddq XFER, Y_2, [TBL + 2*32]
+ vmovdqa [rsp + frame_XFER + 2*32], XFER
+ vpaddq XFER, Y_3, [TBL + 3*32]
+ vmovdqa [rsp + frame_XFER + 3*32], XFER
+
+ /*; schedule 64 input dwords, by doing 12 rounds of 4 each */
+ mov qword ptr [rsp + frame_SRND], 4
+
+.align 16
+.Loop0:
+ FOUR_ROUNDS_AND_SCHED(0, Y_0, Y_1, Y_2, Y_3, a, b, c, d, e, f, g, h)
+ FOUR_ROUNDS_AND_SCHED(1, Y_1, Y_2, Y_3, Y_0, e, f, g, h, a, b, c, d)
+ FOUR_ROUNDS_AND_SCHED(2, Y_2, Y_3, Y_0, Y_1, a, b, c, d, e, f, g, h)
+ FOUR_ROUNDS_AND_SCHED(3, Y_3, Y_0, Y_1, Y_2, e, f, g, h, a, b, c, d)
+ add TBL, 4*32
+
+ sub qword ptr [rsp + frame_SRND], 1
+ jne .Loop0
+
+ sub qword ptr [rsp + frame_NBLKS], 1
+ je .Ldone_hash
+
+ mov INP, [rsp + frame_INP]
+
+ lea TBL,[.LK512 ADD_RIP]
+
+ /* load next block and byte swap */
+ COPY_YMM_AND_BSWAP(Y_0, [INP + 0*32], BYTE_FLIP_MASK)
+ COPY_YMM_AND_BSWAP(Y_1, [INP + 1*32], BYTE_FLIP_MASK)
+ COPY_YMM_AND_BSWAP(Y_2, [INP + 2*32], BYTE_FLIP_MASK)
+ COPY_YMM_AND_BSWAP(Y_3, [INP + 3*32], BYTE_FLIP_MASK)
+
+ add INP, 128
+ mov [rsp + frame_INP], INP
+
+ DO_4ROUNDS(0, a, b, c, d, e, f, g, h)
+ vpaddq XFER, Y_0, [TBL + 0*32]
+ vmovdqa [rsp + frame_XFER + 0*32], XFER
+ DO_4ROUNDS(1, e, f, g, h, a, b, c, d)
+ vpaddq XFER, Y_1, [TBL + 1*32]
+ vmovdqa [rsp + frame_XFER + 1*32], XFER
+ DO_4ROUNDS(2, a, b, c, d, e, f, g, h)
+ vpaddq XFER, Y_2, [TBL + 2*32]
+ vmovdqa [rsp + frame_XFER + 2*32], XFER
+ DO_4ROUNDS(3, e, f, g, h, a, b, c, d)
+ vpaddq XFER, Y_3, [TBL + 3*32]
+ vmovdqa [rsp + frame_XFER + 3*32], XFER
+
+ addm([8*0 + CTX],a)
+ addm([8*1 + CTX],b)
+ addm([8*2 + CTX],c)
+ addm([8*3 + CTX],d)
+ addm([8*4 + CTX],e)
+ addm([8*5 + CTX],f)
+ addm([8*6 + CTX],g)
+ addm([8*7 + CTX],h)
+
+ /*; schedule 64 input dwords, by doing 12 rounds of 4 each */
+ mov qword ptr [rsp + frame_SRND],4
+
+ jmp .Loop0
+
+.Ldone_hash:
+ vzeroall
+
+ DO_4ROUNDS(0, a, b, c, d, e, f, g, h)
+ vmovdqa [rsp + frame_XFER + 0*32], ymm0 /* burn stack */
+ DO_4ROUNDS(1, e, f, g, h, a, b, c, d)
+ vmovdqa [rsp + frame_XFER + 1*32], ymm0 /* burn stack */
+ DO_4ROUNDS(2, a, b, c, d, e, f, g, h)
+ vmovdqa [rsp + frame_XFER + 2*32], ymm0 /* burn stack */
+ DO_4ROUNDS(3, e, f, g, h, a, b, c, d)
+ vmovdqa [rsp + frame_XFER + 3*32], ymm0 /* burn stack */
+
+ addm([8*0 + CTX],a)
+ xor eax, eax /* burn stack */
+ addm([8*1 + CTX],b)
+ addm([8*2 + CTX],c)
+ addm([8*3 + CTX],d)
+ addm([8*4 + CTX],e)
+ addm([8*5 + CTX],f)
+ addm([8*6 + CTX],g)
+ addm([8*7 + CTX],h)
+
+ /* Restore GPRs */
+ mov rbp, [rsp + frame_GPRSAVE + 8 * 0]
+ mov rbx, [rsp + frame_GPRSAVE + 8 * 1]
+ mov r12, [rsp + frame_GPRSAVE + 8 * 2]
+ mov r13, [rsp + frame_GPRSAVE + 8 * 3]
+ mov r14, [rsp + frame_GPRSAVE + 8 * 4]
+ mov r15, [rsp + frame_GPRSAVE + 8 * 5]
+ CFI_RESTORE(rbp)
+ CFI_RESTORE(rbx)
+ CFI_RESTORE(r12)
+ CFI_RESTORE(r13)
+ CFI_RESTORE(r14)
+ CFI_RESTORE(r15)
+
+ /* Restore Stack Pointer */
+ mov rsp, [rsp + frame_RSPSAVE]
+ CFI_DEF_CFA_REGISTER(rsp)
+
+.Lnowork:
+ ret
+ CFI_ENDPROC()
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
+/*;; Binary Data */
+
+.align 64
+/* K[t] used in SHA512 hashing */
+.LK512:
+ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+ .quad 0x3956c25bf348b538,0x59f111f1b605d019
+ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+ .quad 0xd807aa98a3030242,0x12835b0145706fbe
+ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+ .quad 0x06ca6351e003826f,0x142929670a0e6e70
+ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+ .quad 0x81c2c92e47edaee6,0x92722c851482353b
+ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+ .quad 0xd192e819d6ef5218,0xd69906245565a910
+ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+ .quad 0x90befffa23631e28,0xa4506cebde82bde9
+ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+ .quad 0xca273eceea26619c,0xd186b8c721c0c207
+ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+ .quad 0x113f9804bef90dae,0x1b710b35131c471b
+ .quad 0x28db77f523047d84,0x32caab7b40c72493
+ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+.align 32
+
+/* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */
+.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
+ .octa 0x18191a1b1c1d1e1f1011121314151617
+
+.LMASK_YMM_LO: .octa 0x00000000000000000000000000000000
+ .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha512-ppc.c b/comm/third_party/libgcrypt/cipher/sha512-ppc.c
new file mode 100644
index 0000000000..31ea25bf9a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512-ppc.c
@@ -0,0 +1,969 @@
+/* sha512-ppc.c - PowerPC vcrypto implementation of SHA-512 transform
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+ defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+ defined(USE_SHA512) && \
+ __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned long long vector2x_u64;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static const u64 K[80] =
+ {
+ U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+ U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+ U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+ U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+ U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+ U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+ U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+ U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+ U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+ U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+ U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+ U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+ U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+ U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+ U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+ U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+ U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+ U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+ U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+ U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+ U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+ U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+ U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+ U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+ U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+ U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+ U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+ U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+ U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+ U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+ U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+ U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+ U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+ U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+ U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+ U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+ U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+ U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+ U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+ U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+ };
+
+
+static ASM_FUNC_ATTR_INLINE u64
+ror64 (u64 v, u64 shift)
+{
+ return (v >> (shift & 63)) ^ (v << ((64 - shift) & 63));
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_rol_elems(vector2x_u64 v, unsigned int idx)
+{
+#ifndef WORDS_BIGENDIAN
+ return vec_sld (v, v, (16 - (8 * idx)) & 15);
+#else
+ return vec_sld (v, v, (8 * idx) & 15);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1)
+{
+ return vec_mergeh (v0, v1);
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b)
+{
+ __asm__ ("vshasigmad %0,%1,%2,%3"
+ : "=v" (v)
+ : "v" (v), "g" (a), "g" (b)
+ : "memory");
+ return v;
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_u64_load(unsigned long offset, const void *ptr)
+{
+ vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ ("lxvd2x %x0,0,%1\n\t"
+ : "=wa" (vecu64)
+ : "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ ("lxvd2x %x0,%1,%2\n\t"
+ : "=wa" (vecu64)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+#ifndef WORDS_BIGENDIAN
+ __asm__ ("xxswapd %x0, %x1"
+ : "=wa" (vecu64)
+ : "wa" (vecu64));
+#endif
+ return vecu64;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr)
+{
+#ifndef WORDS_BIGENDIAN
+ __asm__ ("xxswapd %x0, %x1"
+ : "=wa" (vecu64)
+ : "wa" (vecu64));
+#endif
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ ("stxvd2x %x0,0,%1\n\t"
+ :
+ : "wa" (vecu64), "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ ("stxvd2x %x0,%1,%2\n\t"
+ :
+ : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+}
+
+
+/* SHA2 round in vector registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do \
+ { \
+ t1 = (h); \
+ t1 += ((k) + (w)); \
+ t1 += Cho((e),(f),(g)); \
+ t1 += Sum1((e)); \
+ t2 = Sum0((a)); \
+ t2 += Maj((a),(b),(c)); \
+ d += t1; \
+ h = t1 + t2; \
+ } while (0)
+
+#define Cho(b, c, d) (vec_sel(d, c, b))
+
+#define Maj(c, d, b) (vec_sel(c, b, c ^ d))
+
+#define Sum0(x) (vec_vshasigma_u64(x, 1, 0))
+
+#define Sum1(x) (vec_vshasigma_u64(x, 1, 15))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror64 ((x), 1) ^ ror64 ((x), 8) ^ ((x) >> 7))
+#define S1(x) (ror64 ((x), 19) ^ ror64 ((x), 61) ^ ((x) >> 6))
+
+#define I(i) ( w[i] = buf_get_be64(data + i * 8) )
+#define WN(i) ({ w[i&0x0f] += w[(i-7) &0x0f]; \
+ w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+ w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+ w[i&0x0f]; })
+#define W(i) ({ u64 r = w[i&0x0f]; WN(i); r; })
+#define L(i) w[i&0x0f]
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha512_transform_ppc8(u64 state[8],
+ const unsigned char *data, size_t nblks)
+{
+ /* GPRs used for message expansion as vector intrinsics based generates
+ * slower code. */
+ vector2x_u64 h0, h1, h2, h3, h4, h5, h6, h7;
+ vector2x_u64 a, b, c, d, e, f, g, h, t1, t2;
+ u64 w[16];
+
+ h0 = vec_u64_load (8 * 0, (unsigned long long *)state);
+ h1 = vec_rol_elems (h0, 1);
+ h2 = vec_u64_load (8 * 2, (unsigned long long *)state);
+ h3 = vec_rol_elems (h2, 1);
+ h4 = vec_u64_load (8 * 4, (unsigned long long *)state);
+ h5 = vec_rol_elems (h4, 1);
+ h6 = vec_u64_load (8 * 6, (unsigned long long *)state);
+ h7 = vec_rol_elems (h6, 1);
+
+ while (nblks >= 2)
+ {
+ a = h0;
+ b = h1;
+ c = h2;
+ d = h3;
+ e = h4;
+ f = h5;
+ g = h6;
+ h = h7;
+
+ I(0); I(1); I(2); I(3);
+ I(4); I(5); I(6); I(7);
+ I(8); I(9); I(10); I(11);
+ I(12); I(13); I(14); I(15);
+ data += 128;
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ R(a, b, c, d, e, f, g, h, K[64], L(64));
+ R(h, a, b, c, d, e, f, g, K[65], L(65));
+ R(g, h, a, b, c, d, e, f, K[66], L(66));
+ R(f, g, h, a, b, c, d, e, K[67], L(67));
+ I(0); I(1); I(2); I(3);
+ R(e, f, g, h, a, b, c, d, K[68], L(68));
+ R(d, e, f, g, h, a, b, c, K[69], L(69));
+ R(c, d, e, f, g, h, a, b, K[70], L(70));
+ R(b, c, d, e, f, g, h, a, K[71], L(71));
+ I(4); I(5); I(6); I(7);
+ R(a, b, c, d, e, f, g, h, K[72], L(72));
+ R(h, a, b, c, d, e, f, g, K[73], L(73));
+ R(g, h, a, b, c, d, e, f, K[74], L(74));
+ R(f, g, h, a, b, c, d, e, K[75], L(75));
+ I(8); I(9); I(10); I(11);
+ R(e, f, g, h, a, b, c, d, K[76], L(76));
+ R(d, e, f, g, h, a, b, c, K[77], L(77));
+ R(c, d, e, f, g, h, a, b, K[78], L(78));
+ R(b, c, d, e, f, g, h, a, K[79], L(79));
+ I(12); I(13); I(14); I(15);
+ data += 128;
+
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
+ h5 += f;
+ h6 += g;
+ h7 += h;
+ a = h0;
+ b = h1;
+ c = h2;
+ d = h3;
+ e = h4;
+ f = h5;
+ g = h6;
+ h = h7;
+
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ R(a, b, c, d, e, f, g, h, K[64], L(64));
+ R(h, a, b, c, d, e, f, g, K[65], L(65));
+ R(g, h, a, b, c, d, e, f, K[66], L(66));
+ R(f, g, h, a, b, c, d, e, K[67], L(67));
+ R(e, f, g, h, a, b, c, d, K[68], L(68));
+ R(d, e, f, g, h, a, b, c, K[69], L(69));
+ R(c, d, e, f, g, h, a, b, K[70], L(70));
+ R(b, c, d, e, f, g, h, a, K[71], L(71));
+ R(a, b, c, d, e, f, g, h, K[72], L(72));
+ R(h, a, b, c, d, e, f, g, K[73], L(73));
+ R(g, h, a, b, c, d, e, f, K[74], L(74));
+ R(f, g, h, a, b, c, d, e, K[75], L(75));
+ R(e, f, g, h, a, b, c, d, K[76], L(76));
+ R(d, e, f, g, h, a, b, c, K[77], L(77));
+ R(c, d, e, f, g, h, a, b, K[78], L(78));
+ R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
+ h5 += f;
+ h6 += g;
+ h7 += h;
+
+ nblks -= 2;
+ }
+
+ while (nblks)
+ {
+ a = h0;
+ b = h1;
+ c = h2;
+ d = h3;
+ e = h4;
+ f = h5;
+ g = h6;
+ h = h7;
+
+ I(0); I(1); I(2); I(3);
+ I(4); I(5); I(6); I(7);
+ I(8); I(9); I(10); I(11);
+ I(12); I(13); I(14); I(15);
+ data += 128;
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ R(a, b, c, d, e, f, g, h, K[64], L(64));
+ R(h, a, b, c, d, e, f, g, K[65], L(65));
+ R(g, h, a, b, c, d, e, f, K[66], L(66));
+ R(f, g, h, a, b, c, d, e, K[67], L(67));
+ R(e, f, g, h, a, b, c, d, K[68], L(68));
+ R(d, e, f, g, h, a, b, c, K[69], L(69));
+ R(c, d, e, f, g, h, a, b, K[70], L(70));
+ R(b, c, d, e, f, g, h, a, K[71], L(71));
+ R(a, b, c, d, e, f, g, h, K[72], L(72));
+ R(h, a, b, c, d, e, f, g, K[73], L(73));
+ R(g, h, a, b, c, d, e, f, K[74], L(74));
+ R(f, g, h, a, b, c, d, e, K[75], L(75));
+ R(e, f, g, h, a, b, c, d, K[76], L(76));
+ R(d, e, f, g, h, a, b, c, K[77], L(77));
+ R(c, d, e, f, g, h, a, b, K[78], L(78));
+ R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
+ h5 += f;
+ h6 += g;
+ h7 += h;
+
+ nblks--;
+ }
+
+ h0 = vec_merge_idx0_elems (h0, h1);
+ h2 = vec_merge_idx0_elems (h2, h3);
+ h4 = vec_merge_idx0_elems (h4, h5);
+ h6 = vec_merge_idx0_elems (h6, h7);
+ vec_u64_store (h0, 8 * 0, (unsigned long long *)state);
+ vec_u64_store (h2, 8 * 2, (unsigned long long *)state);
+ vec_u64_store (h4, 8 * 4, (unsigned long long *)state);
+ vec_u64_store (h6, 8 * 6, (unsigned long long *)state);
+
+ return sizeof(w);
+}
+#undef R
+#undef Cho
+#undef Maj
+#undef Sum0
+#undef Sum1
+#undef S0
+#undef S1
+#undef I
+#undef W
+#undef I2
+#undef W2
+#undef R2
+
+
+/* SHA2 round in general purpose registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do \
+ { \
+ t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + ((k) + (w));\
+ t2 = Sum0((a)) + Maj((a),(b),(c)); \
+ d += t1; \
+ h = t1 + t2; \
+ } while (0)
+
+#define Cho(x, y, z) ((x & y) + (~x & z))
+
+#define Maj(z, x, y) ((x & y) + (z & (x ^ y)))
+
+#define Sum0(x) (ror64(x, 28) ^ ror64(x ^ ror64(x, 39-34), 34))
+
+#define Sum1(x) (ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror64 ((x), 1) ^ ror64 ((x), 8) ^ ((x) >> 7))
+#define S1(x) (ror64 ((x), 19) ^ ror64 ((x), 61) ^ ((x) >> 6))
+
+#define I(i) ( w[i] = buf_get_be64(data + i * 8) )
+#define WN(i) ({ w[i&0x0f] += w[(i-7) &0x0f]; \
+ w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+ w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+ w[i&0x0f]; })
+#define W(i) ({ u64 r = w[i&0x0f]; WN(i); r; })
+#define L(i) w[i&0x0f]
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha512_transform_ppc9(u64 state[8], const unsigned char *data,
+ size_t nblks)
+{
+ /* GPRs used for round function and message expansion as vector intrinsics
+ * based generates slower code for POWER9. */
+ u64 a, b, c, d, e, f, g, h, t1, t2;
+ u64 w[16];
+
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ f = state[5];
+ g = state[6];
+ h = state[7];
+
+ while (nblks >= 2)
+ {
+ I(0); I(1); I(2); I(3);
+ I(4); I(5); I(6); I(7);
+ I(8); I(9); I(10); I(11);
+ I(12); I(13); I(14); I(15);
+ data += 128;
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ R(a, b, c, d, e, f, g, h, K[64], L(64));
+ R(h, a, b, c, d, e, f, g, K[65], L(65));
+ R(g, h, a, b, c, d, e, f, K[66], L(66));
+ R(f, g, h, a, b, c, d, e, K[67], L(67));
+ I(0); I(1); I(2); I(3);
+ R(e, f, g, h, a, b, c, d, K[68], L(68));
+ R(d, e, f, g, h, a, b, c, K[69], L(69));
+ R(c, d, e, f, g, h, a, b, K[70], L(70));
+ R(b, c, d, e, f, g, h, a, K[71], L(71));
+ I(4); I(5); I(6); I(7);
+ R(a, b, c, d, e, f, g, h, K[72], L(72));
+ R(h, a, b, c, d, e, f, g, K[73], L(73));
+ R(g, h, a, b, c, d, e, f, K[74], L(74));
+ R(f, g, h, a, b, c, d, e, K[75], L(75));
+ I(8); I(9); I(10); I(11);
+ R(e, f, g, h, a, b, c, d, K[76], L(76));
+ R(d, e, f, g, h, a, b, c, K[77], L(77));
+ R(c, d, e, f, g, h, a, b, K[78], L(78));
+ R(b, c, d, e, f, g, h, a, K[79], L(79));
+ I(12); I(13); I(14); I(15);
+ data += 128;
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+ f += state[5];
+ g += state[6];
+ h += state[7];
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+ state[5] = f;
+ state[6] = g;
+ state[7] = h;
+
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ R(a, b, c, d, e, f, g, h, K[64], L(64));
+ R(h, a, b, c, d, e, f, g, K[65], L(65));
+ R(g, h, a, b, c, d, e, f, K[66], L(66));
+ R(f, g, h, a, b, c, d, e, K[67], L(67));
+ R(e, f, g, h, a, b, c, d, K[68], L(68));
+ R(d, e, f, g, h, a, b, c, K[69], L(69));
+ R(c, d, e, f, g, h, a, b, K[70], L(70));
+ R(b, c, d, e, f, g, h, a, K[71], L(71));
+ R(a, b, c, d, e, f, g, h, K[72], L(72));
+ R(h, a, b, c, d, e, f, g, K[73], L(73));
+ R(g, h, a, b, c, d, e, f, K[74], L(74));
+ R(f, g, h, a, b, c, d, e, K[75], L(75));
+ R(e, f, g, h, a, b, c, d, K[76], L(76));
+ R(d, e, f, g, h, a, b, c, K[77], L(77));
+ R(c, d, e, f, g, h, a, b, K[78], L(78));
+ R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+ f += state[5];
+ g += state[6];
+ h += state[7];
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+ state[5] = f;
+ state[6] = g;
+ state[7] = h;
+
+ nblks -= 2;
+ }
+
+ while (nblks)
+ {
+ I(0); I(1); I(2); I(3);
+ I(4); I(5); I(6); I(7);
+ I(8); I(9); I(10); I(11);
+ I(12); I(13); I(14); I(15);
+ data += 128;
+ R(a, b, c, d, e, f, g, h, K[0], W(0));
+ R(h, a, b, c, d, e, f, g, K[1], W(1));
+ R(g, h, a, b, c, d, e, f, K[2], W(2));
+ R(f, g, h, a, b, c, d, e, K[3], W(3));
+ R(e, f, g, h, a, b, c, d, K[4], W(4));
+ R(d, e, f, g, h, a, b, c, K[5], W(5));
+ R(c, d, e, f, g, h, a, b, K[6], W(6));
+ R(b, c, d, e, f, g, h, a, K[7], W(7));
+ R(a, b, c, d, e, f, g, h, K[8], W(8));
+ R(h, a, b, c, d, e, f, g, K[9], W(9));
+ R(g, h, a, b, c, d, e, f, K[10], W(10));
+ R(f, g, h, a, b, c, d, e, K[11], W(11));
+ R(e, f, g, h, a, b, c, d, K[12], W(12));
+ R(d, e, f, g, h, a, b, c, K[13], W(13));
+ R(c, d, e, f, g, h, a, b, K[14], W(14));
+ R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ R(a, b, c, d, e, f, g, h, K[64], L(64));
+ R(h, a, b, c, d, e, f, g, K[65], L(65));
+ R(g, h, a, b, c, d, e, f, K[66], L(66));
+ R(f, g, h, a, b, c, d, e, K[67], L(67));
+ R(e, f, g, h, a, b, c, d, K[68], L(68));
+ R(d, e, f, g, h, a, b, c, K[69], L(69));
+ R(c, d, e, f, g, h, a, b, K[70], L(70));
+ R(b, c, d, e, f, g, h, a, K[71], L(71));
+ R(a, b, c, d, e, f, g, h, K[72], L(72));
+ R(h, a, b, c, d, e, f, g, K[73], L(73));
+ R(g, h, a, b, c, d, e, f, K[74], L(74));
+ R(f, g, h, a, b, c, d, e, K[75], L(75));
+ R(e, f, g, h, a, b, c, d, K[76], L(76));
+ R(d, e, f, g, h, a, b, c, K[77], L(77));
+ R(c, d, e, f, g, h, a, b, K[78], L(78));
+ R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+ f += state[5];
+ g += state[6];
+ h += state[7];
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+ state[5] = f;
+ state[6] = g;
+ state[7] = h;
+
+ nblks--;
+ }
+
+ return sizeof(w);
+}
+
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
diff --git a/comm/third_party/libgcrypt/cipher/sha512-ssse3-amd64.S b/comm/third_party/libgcrypt/cipher/sha512-ssse3-amd64.S
new file mode 100644
index 0000000000..6a1328a690
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512-ssse3-amd64.S
@@ -0,0 +1,467 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the
+; distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ * by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Note: original implementation was named as SHA512-SSE4. However, only SSSE3
+ * is required.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+.text
+
+/* Virtual Registers */
+#define msg rdi /* ARG1 */
+#define digest rsi /* ARG2 */
+#define msglen rdx /* ARG3 */
+#define T1 rcx
+#define T2 r8
+#define a_64 r9
+#define b_64 r10
+#define c_64 r11
+#define d_64 r12
+#define e_64 r13
+#define f_64 r14
+#define g_64 r15
+#define h_64 rbx
+#define tmp0 rax
+
+/*
+; Local variables (stack frame)
+; Note: frame_size must be an odd multiple of 8 bytes to XMM align RSP
+*/
+#define frame_W 0 /* Message Schedule */
+#define frame_W_size (80 * 8)
+#define frame_WK ((frame_W) + (frame_W_size)) /* W[t] + K[t] | W[t+1] + K[t+1] */
+#define frame_WK_size (2 * 8)
+#define frame_GPRSAVE ((frame_WK) + (frame_WK_size))
+#define frame_GPRSAVE_size (5 * 8)
+#define frame_size ((frame_GPRSAVE) + (frame_GPRSAVE_size))
+
+
+/* Useful QWORD "arrays" for simpler memory references */
+#define MSG(i) msg + 8*(i) /* Input message (arg1) */
+#define DIGEST(i) digest + 8*(i) /* Output Digest (arg2) */
+#define K_t(i) .LK512 + 8*(i) ADD_RIP /* SHA Constants (static mem) */
+#define W_t(i) rsp + frame_W + 8*(i) /* Message Schedule (stack frame) */
+#define WK_2(i) rsp + frame_WK + 8*((i) % 2) /* W[t]+K[t] (stack frame) */
+/* MSG, DIGEST, K_t, W_t are arrays */
+/* WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even */
+
+#define SHA512_Round(t, a, b, c, d, e, f, g, h) \
+ /* Compute Round %%t */; \
+ mov T1, f /* T1 = f */; \
+ mov tmp0, e /* tmp = e */; \
+ xor T1, g /* T1 = f ^ g */; \
+ ror tmp0, 23 /* 41 ; tmp = e ror 23 */; \
+ and T1, e /* T1 = (f ^ g) & e */; \
+ xor tmp0, e /* tmp = (e ror 23) ^ e */; \
+ xor T1, g /* T1 = ((f ^ g) & e) ^ g = CH(e,f,g) */; \
+ add T1, [WK_2(t)] /* W[t] + K[t] from message scheduler */; \
+ ror tmp0, 4 /* 18 ; tmp = ((e ror 23) ^ e) ror 4 */; \
+ xor tmp0, e /* tmp = (((e ror 23) ^ e) ror 4) ^ e */; \
+ mov T2, a /* T2 = a */; \
+ add T1, h /* T1 = CH(e,f,g) + W[t] + K[t] + h */; \
+ ror tmp0, 14 /* 14 ; tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) */; \
+ add T1, tmp0 /* T1 = CH(e,f,g) + W[t] + K[t] + S1(e) */; \
+ mov tmp0, a /* tmp = a */; \
+ xor T2, c /* T2 = a ^ c */; \
+ and tmp0, c /* tmp = a & c */; \
+ and T2, b /* T2 = (a ^ c) & b */; \
+ xor T2, tmp0 /* T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) */; \
+ mov tmp0, a /* tmp = a */; \
+ ror tmp0, 5 /* 39 ; tmp = a ror 5 */; \
+ xor tmp0, a /* tmp = (a ror 5) ^ a */; \
+ add d, T1 /* e(next_state) = d + T1 */; \
+ ror tmp0, 6 /* 34 ; tmp = ((a ror 5) ^ a) ror 6 */; \
+ xor tmp0, a /* tmp = (((a ror 5) ^ a) ror 6) ^ a */; \
+ lea h, [T1 + T2] /* a(next_state) = T1 + Maj(a,b,c) */; \
+ ror tmp0, 28 /* 28 ; tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) */; \
+ add h, tmp0 /* a(next_state) = T1 + Maj(a,b,c) S0(a) */
+
+#define SHA512_2Sched_2Round_sse_PART1(t, a, b, c, d, e, f, g, h) \
+ /* \
+ ; Compute rounds %%t-2 and %%t-1 \
+ ; Compute message schedule QWORDS %%t and %%t+1 \
+ ; \
+ ; Two rounds are computed based on the values for K[t-2]+W[t-2] and \
+ ; K[t-1]+W[t-1] which were previously stored at WK_2 by the message \
+ ; scheduler. \
+ ; The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)]. \
+ ; They are then added to their respective SHA512 constants at \
+ ; [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] \
+ ; For brievity, the comments following vectored instructions only refer to \
+ ; the first of a pair of QWORDS. \
+ ; Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]} \
+ ; The computation of the message schedule and the rounds are tightly \
+ ; stitched to take advantage of instruction-level parallelism. \
+ ; For clarity, integer instructions (for the rounds calculation) are indented \
+ ; by one tab. Vectored instructions (for the message scheduler) are indented \
+ ; by two tabs. \
+ */ \
+ \
+ mov T1, f; \
+ movdqa xmm2, [W_t(t-2)] /* XMM2 = W[t-2] */; \
+ xor T1, g; \
+ and T1, e; \
+ movdqa xmm0, xmm2 /* XMM0 = W[t-2] */; \
+ xor T1, g; \
+ add T1, [WK_2(t)]; \
+ movdqu xmm5, [W_t(t-15)] /* XMM5 = W[t-15] */; \
+ mov tmp0, e; \
+ ror tmp0, 23 /* 41 */; \
+ movdqa xmm3, xmm5 /* XMM3 = W[t-15] */; \
+ xor tmp0, e; \
+ ror tmp0, 4 /* 18 */; \
+ psrlq xmm0, 61 - 19 /* XMM0 = W[t-2] >> 42 */; \
+ xor tmp0, e; \
+ ror tmp0, 14 /* 14 */; \
+ psrlq xmm3, (8 - 7) /* XMM3 = W[t-15] >> 1 */; \
+ add T1, tmp0; \
+ add T1, h; \
+ pxor xmm0, xmm2 /* XMM0 = (W[t-2] >> 42) ^ W[t-2] */; \
+ mov T2, a; \
+ xor T2, c; \
+ pxor xmm3, xmm5 /* XMM3 = (W[t-15] >> 1) ^ W[t-15] */; \
+ and T2, b; \
+ mov tmp0, a; \
+ psrlq xmm0, 19 - 6 /* XMM0 = ((W[t-2]>>42)^W[t-2])>>13 */; \
+ and tmp0, c; \
+ xor T2, tmp0; \
+ psrlq xmm3, (7 - 1) /* XMM3 = ((W[t-15]>>1)^W[t-15])>>6 */; \
+ mov tmp0, a; \
+ ror tmp0, 5 /* 39 */; \
+ pxor xmm0, xmm2 /* XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2] */; \
+ xor tmp0, a; \
+ ror tmp0, 6 /* 34 */; \
+ pxor xmm3, xmm5 /* XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15] */; \
+ xor tmp0, a; \
+ ror tmp0, 28 /* 28 */; \
+ psrlq xmm0, 6 /* XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6 */; \
+ add T2, tmp0; \
+ add d, T1; \
+ psrlq xmm3, 1 /* XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1 */; \
+ lea h, [T1 + T2]
+
+#define SHA512_2Sched_2Round_sse_PART2(t, a, b, c, d, e, f, g, h) \
+ movdqa xmm1, xmm2 /* XMM1 = W[t-2] */; \
+ mov T1, f; \
+ xor T1, g; \
+ movdqa xmm4, xmm5 /* XMM4 = W[t-15] */; \
+ and T1, e; \
+ xor T1, g; \
+ psllq xmm1, (64 - 19) - (64 - 61) /* XMM1 = W[t-2] << 42 */; \
+ add T1, [WK_2(t+1)]; \
+ mov tmp0, e; \
+ psllq xmm4, (64 - 1) - (64 - 8) /* XMM4 = W[t-15] << 7 */; \
+ ror tmp0, 23 /* 41 */; \
+ xor tmp0, e; \
+ pxor xmm1, xmm2 /* XMM1 = (W[t-2] << 42)^W[t-2] */; \
+ ror tmp0, 4 /* 18 */; \
+ xor tmp0, e; \
+ pxor xmm4, xmm5 /* XMM4 = (W[t-15]<<7)^W[t-15] */; \
+ ror tmp0, 14 /* 14 */; \
+ add T1, tmp0; \
+ psllq xmm1, (64 - 61) /* XMM1 = ((W[t-2] << 42)^W[t-2])<<3 */; \
+ add T1, h; \
+ mov T2, a; \
+ psllq xmm4, (64 - 8) /* XMM4 = ((W[t-15]<<7)^W[t-15])<<56 */; \
+ xor T2, c; \
+ and T2, b; \
+ pxor xmm0, xmm1 /* XMM0 = s1(W[t-2]) */; \
+ mov tmp0, a; \
+ and tmp0, c; \
+ movdqu xmm1, [W_t(t- 7)] /* XMM1 = W[t-7] */; \
+ xor T2, tmp0; \
+ pxor xmm3, xmm4 /* XMM3 = s0(W[t-15]) */; \
+ mov tmp0, a; \
+ paddq xmm0, xmm3 /* XMM0 = s1(W[t-2]) + s0(W[t-15]) */; \
+ ror tmp0, 5 /* 39 */; \
+ paddq xmm0, [W_t(t-16)] /* XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16] */; \
+ xor tmp0, a; \
+ paddq xmm0, xmm1 /* XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16] */; \
+ ror tmp0, 6 /* 34 */; \
+ movdqa [W_t(t)], xmm0 /* Store scheduled qwords */; \
+ xor tmp0, a; \
+ paddq xmm0, [K_t(t)] /* Compute W[t]+K[t] */; \
+ ror tmp0, 28 /* 28 */; \
+ movdqa [WK_2(t)], xmm0 /* Store W[t]+K[t] for next rounds */; \
+ add T2, tmp0; \
+ add d, T1; \
+ lea h, [T1 + T2]
+
+#define SHA512_2Sched_2Round_sse(t, a, b, c, d, e, f, g, h) \
+ SHA512_2Sched_2Round_sse_PART1(t, a, b, c, d, e, f, g, h); \
+ SHA512_2Sched_2Round_sse_PART2(t, h, a, b, c, d, e, f, g)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; void sha512_sse4(const void* M, void* D, uint64_t L);
+; Purpose: Updates the SHA512 digest stored at D with the message stored in M.
+; The size of the message pointed to by M must be an integer multiple of SHA512
+; message blocks.
+; L is the message length in SHA512 blocks.
+*/
+.globl _gcry_sha512_transform_amd64_ssse3
+ELF(.type _gcry_sha512_transform_amd64_ssse3,@function;)
+.align 16
+_gcry_sha512_transform_amd64_ssse3:
+ CFI_STARTPROC()
+ xor eax, eax
+
+ cmp msglen, 0
+ je .Lnowork
+
+ /* Allocate Stack Space */
+ sub rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(frame_size);
+
+ /* Save GPRs */
+ mov [rsp + frame_GPRSAVE + 8 * 0], rbx
+ mov [rsp + frame_GPRSAVE + 8 * 1], r12
+ mov [rsp + frame_GPRSAVE + 8 * 2], r13
+ mov [rsp + frame_GPRSAVE + 8 * 3], r14
+ mov [rsp + frame_GPRSAVE + 8 * 4], r15
+ CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0);
+ CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1);
+ CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2);
+ CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3);
+ CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4);
+
+.Lupdateblock:
+
+ /* Load state variables */
+ mov a_64, [DIGEST(0)]
+ mov b_64, [DIGEST(1)]
+ mov c_64, [DIGEST(2)]
+ mov d_64, [DIGEST(3)]
+ mov e_64, [DIGEST(4)]
+ mov f_64, [DIGEST(5)]
+ mov g_64, [DIGEST(6)]
+ mov h_64, [DIGEST(7)]
+
+ /* BSWAP 2 QWORDS */
+ movdqa xmm1, [.LXMM_QWORD_BSWAP ADD_RIP]
+ movdqu xmm0, [MSG(0)]
+ pshufb xmm0, xmm1 /* BSWAP */
+ movdqa [W_t(0)], xmm0 /* Store Scheduled Pair */
+ paddq xmm0, [K_t(0)] /* Compute W[t]+K[t] */
+ movdqa [WK_2(0)], xmm0 /* Store into WK for rounds */
+
+ #define T_2_14(t, a, b, c, d, e, f, g, h) \
+ /* BSWAP 2 QWORDS; Compute 2 Rounds */; \
+ movdqu xmm0, [MSG(t)]; \
+ pshufb xmm0, xmm1 /* BSWAP */; \
+ SHA512_Round(((t) - 2), a##_64, b##_64, c##_64, d##_64, \
+ e##_64, f##_64, g##_64, h##_64); \
+ movdqa [W_t(t)], xmm0 /* Store Scheduled Pair */; \
+ paddq xmm0, [K_t(t)] /* Compute W[t]+K[t] */; \
+ SHA512_Round(((t) - 1), h##_64, a##_64, b##_64, c##_64, \
+ d##_64, e##_64, f##_64, g##_64); \
+ movdqa [WK_2(t)], xmm0 /* Store W[t]+K[t] into WK */
+
+ #define T_16_78(t, a, b, c, d, e, f, g, h) \
+ SHA512_2Sched_2Round_sse((t), a##_64, b##_64, c##_64, d##_64, \
+ e##_64, f##_64, g##_64, h##_64)
+
+ #define T_80(t, a, b, c, d, e, f, g, h) \
+ /* Compute 2 Rounds */; \
+ SHA512_Round((t - 2), a##_64, b##_64, c##_64, d##_64, \
+ e##_64, f##_64, g##_64, h##_64); \
+ SHA512_Round((t - 1), h##_64, a##_64, b##_64, c##_64, \
+ d##_64, e##_64, f##_64, g##_64)
+
+ T_2_14(2, a, b, c, d, e, f, g, h)
+ T_2_14(4, g, h, a, b, c, d, e, f)
+ T_2_14(6, e, f, g, h, a, b, c, d)
+ T_2_14(8, c, d, e, f, g, h, a, b)
+ T_2_14(10, a, b, c, d, e, f, g, h)
+ T_2_14(12, g, h, a, b, c, d, e, f)
+ T_2_14(14, e, f, g, h, a, b, c, d)
+ T_16_78(16, c, d, e, f, g, h, a, b)
+ T_16_78(18, a, b, c, d, e, f, g, h)
+ T_16_78(20, g, h, a, b, c, d, e, f)
+ T_16_78(22, e, f, g, h, a, b, c, d)
+ T_16_78(24, c, d, e, f, g, h, a, b)
+ T_16_78(26, a, b, c, d, e, f, g, h)
+ T_16_78(28, g, h, a, b, c, d, e, f)
+ T_16_78(30, e, f, g, h, a, b, c, d)
+ T_16_78(32, c, d, e, f, g, h, a, b)
+ T_16_78(34, a, b, c, d, e, f, g, h)
+ T_16_78(36, g, h, a, b, c, d, e, f)
+ T_16_78(38, e, f, g, h, a, b, c, d)
+ T_16_78(40, c, d, e, f, g, h, a, b)
+ T_16_78(42, a, b, c, d, e, f, g, h)
+ T_16_78(44, g, h, a, b, c, d, e, f)
+ T_16_78(46, e, f, g, h, a, b, c, d)
+ T_16_78(48, c, d, e, f, g, h, a, b)
+ T_16_78(50, a, b, c, d, e, f, g, h)
+ T_16_78(52, g, h, a, b, c, d, e, f)
+ T_16_78(54, e, f, g, h, a, b, c, d)
+ T_16_78(56, c, d, e, f, g, h, a, b)
+ T_16_78(58, a, b, c, d, e, f, g, h)
+ T_16_78(60, g, h, a, b, c, d, e, f)
+ T_16_78(62, e, f, g, h, a, b, c, d)
+ T_16_78(64, c, d, e, f, g, h, a, b)
+ T_16_78(66, a, b, c, d, e, f, g, h)
+ T_16_78(68, g, h, a, b, c, d, e, f)
+ T_16_78(70, e, f, g, h, a, b, c, d)
+ T_16_78(72, c, d, e, f, g, h, a, b)
+ T_16_78(74, a, b, c, d, e, f, g, h)
+ T_16_78(76, g, h, a, b, c, d, e, f)
+ T_16_78(78, e, f, g, h, a, b, c, d)
+ T_80(80, c, d, e, f, g, h, a, b)
+
+ /* Update digest */
+ add [DIGEST(0)], a_64
+ add [DIGEST(1)], b_64
+ add [DIGEST(2)], c_64
+ add [DIGEST(3)], d_64
+ add [DIGEST(4)], e_64
+ add [DIGEST(5)], f_64
+ add [DIGEST(6)], g_64
+ add [DIGEST(7)], h_64
+
+ /* Advance to next message block */
+ add msg, 16*8
+ dec msglen
+ jnz .Lupdateblock
+
+ /* Restore GPRs */
+ mov rbx, [rsp + frame_GPRSAVE + 8 * 0]
+ mov r12, [rsp + frame_GPRSAVE + 8 * 1]
+ mov r13, [rsp + frame_GPRSAVE + 8 * 2]
+ mov r14, [rsp + frame_GPRSAVE + 8 * 3]
+ mov r15, [rsp + frame_GPRSAVE + 8 * 4]
+ CFI_RESTORE(rbx)
+ CFI_RESTORE(r12)
+ CFI_RESTORE(r13)
+ CFI_RESTORE(r14)
+ CFI_RESTORE(r15)
+
+ pxor xmm0, xmm0
+ pxor xmm1, xmm1
+ pxor xmm2, xmm2
+ pxor xmm3, xmm3
+ pxor xmm4, xmm4
+ pxor xmm5, xmm5
+
+ /* Burn stack */
+ mov eax, 0
+.Lerase_stack:
+ movdqu [rsp + rax], xmm0
+ add eax, 16
+ cmp eax, frame_W_size
+ jne .Lerase_stack
+ movdqu [rsp + frame_WK], xmm0
+ xor eax, eax
+
+ /* Restore Stack Pointer */
+ add rsp, frame_size
+ CFI_ADJUST_CFA_OFFSET(-frame_size);
+
+.Lnowork:
+ ret
+ CFI_ENDPROC()
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Binary Data
+*/
+
+.align 16
+
+/* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */
+.LXMM_QWORD_BSWAP:
+ .octa 0x08090a0b0c0d0e0f0001020304050607
+
+/* K[t] used in SHA512 hashing */
+.LK512:
+ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+ .quad 0x3956c25bf348b538,0x59f111f1b605d019
+ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+ .quad 0xd807aa98a3030242,0x12835b0145706fbe
+ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+ .quad 0x06ca6351e003826f,0x142929670a0e6e70
+ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+ .quad 0x81c2c92e47edaee6,0x92722c851482353b
+ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+ .quad 0xd192e819d6ef5218,0xd69906245565a910
+ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+ .quad 0x90befffa23631e28,0xa4506cebde82bde9
+ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+ .quad 0xca273eceea26619c,0xd186b8c721c0c207
+ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+ .quad 0x113f9804bef90dae,0x1b710b35131c471b
+ .quad 0x28db77f523047d84,0x32caab7b40c72493
+ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha512-ssse3-i386.c b/comm/third_party/libgcrypt/cipher/sha512-ssse3-i386.c
new file mode 100644
index 0000000000..0fc98d8ed2
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512-ssse3-i386.c
@@ -0,0 +1,404 @@
+/* sha512-ssse3-i386.c - i386/SSSE3 implementation of SHA-512 transform
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * SHA512 Message Expansion (I2 and W2 macros) based on implementation
+ * from file "sha512-ssse3-amd64.s":
+ ************************************************************************
+ * Copyright (c) 2012, Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of the Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ************************************************************************
+ */
+
+#include <config.h>
+
+#if defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \
+ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512)
+
+#include "bufhelp.h"
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE/MMX instructions between asm blocks. */
+# pragma GCC target("no-sse")
+# pragma GCC target("no-mmx")
+#endif
+#if __clang__
+# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
+# pragma clang attribute push (__attribute__((target("no-mmx"))), apply_to = function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static const u64 K[80] __attribute__ ((aligned (16))) =
+ {
+ U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+ U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+ U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+ U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+ U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+ U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+ U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+ U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+ U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+ U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+ U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+ U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+ U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+ U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+ U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+ U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+ U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+ U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+ U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+ U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+ U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+ U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+ U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+ U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+ U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+ U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+ U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+ U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+ U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+ U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+ U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+ U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+ U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+ U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+ U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+ U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+ U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+ U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+ U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+ U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+ };
+
+static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
+ { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 };
+
+
+/* SHA2 round */
+#define RA "%%mm0"
+#define RB "%%mm1"
+#define RC "%%mm2"
+#define RD "%%mm3"
+#define RE "%%mm4"
+#define RF "%%mm5"
+#define RG "%%mm6"
+#define RH "%%mm7"
+
+#define Rx(a,b,c,d,e,f,g,h,wk) \
+ asm volatile (/* Cho + Sum1 */ \
+ "movq2dq "a", %%xmm2;\n\t" \
+ "movq "e", "a";\n\t" \
+ "movq2dq "c", %%xmm3;\n\t" \
+ "movq "e", "c";\n\t" \
+ "movq2dq "b", %%xmm4;\n\t" \
+ "movq "e", "b";\n\t" \
+ "psrlq $(41-18), "c";\n\t" \
+ "pandn "g", "a";\n\t" \
+ "pxor "e", "c";\n\t" \
+ "pand "f", "b";\n\t" \
+ "psrlq $(18-14), "c";\n\t" \
+ "paddq "a", "h";\n\t" \
+ wk(a) \
+ "pxor "e", "c";\n\t" \
+ "paddq "b", "h";\n\t" \
+ "psrlq $(14), "c";\n\t" \
+ "movq "e", "b";\n\t" \
+ "psllq $(50-46), "b";\n\t" \
+ "paddq "a", "h";\n\t" \
+ "movdq2q %%xmm2, "a";\n\t" \
+ "pxor "e", "b";\n\t" \
+ "psllq $(46-23), "b";\n\t" \
+ "pxor "e", "b";\n\t" \
+ "psllq $(23), "b";\n\t" \
+ "pxor "b", "c";\n\t" \
+ "movdq2q %%xmm4, "b";\n\t" \
+ "paddq "c", "h";\n\t" \
+ "movdq2q %%xmm3, "c";\n\t" \
+ \
+ /* Maj + Sum0 */ \
+ "movq2dq "e", %%xmm2;\n\t" \
+ "movq "a", "e";\n\t" \
+ "movq2dq "g", %%xmm3;\n\t" \
+ "movq "a", "g";\n\t" \
+ "movq2dq "f", %%xmm4;\n\t" \
+ "movq "a", "f";\n\t" \
+ "psrlq $(39-34), "g";\n\t" \
+ "pxor "b", "e";\n\t" \
+ "pxor "a", "g";\n\t" \
+ "pand "b", "f";\n\t" \
+ "psrlq $(34-28), "g";\n\t" \
+ "pand "c", "e";\n\t" \
+ "pxor "a", "g";\n\t" \
+ "paddq "h", "d";\n\t" \
+ "paddq "f", "h";\n\t" \
+ "movdq2q %%xmm4, "f";\n\t" \
+ "psrlq $28, "g";\n\t" \
+ "paddq "e", "h";\n\t" \
+ "movq "a", "e";\n\t" \
+ "psllq $(36-30), "e";\n\t" \
+ "pxor "a", "e";\n\t" \
+ "psllq $(30-25), "e";\n\t" \
+ "pxor "a", "e";\n\t" \
+ "psllq $(25), "e";\n\t" \
+ "pxor "e", "g";\n\t" \
+ "movdq2q %%xmm2, "e";\n\t" \
+ "paddq "g", "h";\n\t" \
+ "movdq2q %%xmm3, "g";\n\t" \
+ \
+ : \
+ : \
+ : "memory" )
+
+#define WK0(tmp) "movdq2q %%xmm0, "tmp";\n\t" \
+ "pshufd $0xee, %%xmm0, %%xmm0;\n\t"
+
+#define WK1(tmp) "movdq2q %%xmm0, "tmp";\n\t"
+
+/* Message expansion */
+#define I2(i) \
+ asm volatile ("movdqu %[inbuf], %%xmm0;\n\t" \
+ "pshufb %%xmm6, %%xmm0;\n\t" \
+ "movdqu %%xmm0, %[w];\n\t" \
+ "paddq %[k], %%xmm0;\n\t" \
+ : \
+ : [k] "m" (K[i]), \
+ [w] "m" (w[i]), \
+ [inbuf] "m" (data[(i)*8]) \
+ : "memory" )
+
+#define W2(i) \
+ asm volatile ("movdqu %[w_t_m_2], %%xmm2;\n\t" \
+ "movdqa %%xmm2, %%xmm0;\n\t" \
+ "movdqu %[w_t_m_15], %%xmm5;\n\t" \
+ : \
+ : [w_t_m_2] "m" (w[(i)-2]), \
+ [w_t_m_15] "m" (w[(i)-15]) \
+ : "memory" ); \
+ asm volatile ("movdqa %%xmm5, %%xmm3;\n\t" \
+ "psrlq $(61-19), %%xmm0;\n\t" \
+ "psrlq $(8-7), %%xmm3;\n\t" \
+ "pxor %%xmm2, %%xmm0;\n\t" \
+ "pxor %%xmm5, %%xmm3;\n\t" \
+ "psrlq $(19-6), %%xmm0;\n\t" \
+ "psrlq $(7-1), %%xmm3;\n\t" \
+ "pxor %%xmm2, %%xmm0;\n\t" \
+ "pxor %%xmm5, %%xmm3;\n\t" \
+ "psrlq $6, %%xmm0;\n\t" \
+ "psrlq $1, %%xmm3;\n\t" \
+ "movdqa %%xmm2, %%xmm1;\n\t" \
+ "movdqa %%xmm5, %%xmm4;\n\t" \
+ "psllq $(61-19), %%xmm1;\n\t" \
+ "psllq $(8-1), %%xmm4;\n\t" \
+ "pxor %%xmm2, %%xmm1;\n\t" \
+ "pxor %%xmm5, %%xmm4;\n\t" \
+ "psllq $(64-61), %%xmm1;\n\t" \
+ "psllq $(64-8), %%xmm4;\n\t" \
+ "pxor %%xmm1, %%xmm0;\n\t" \
+ "movdqu %[w_t_m_16], %%xmm2;\n\t" \
+ "pxor %%xmm4, %%xmm3;\n\t" \
+ "movdqu %[w_t_m_7], %%xmm1;\n\t" \
+ : \
+ : [w_t_m_7] "m" (w[(i)-7]), \
+ [w_t_m_16] "m" (w[(i)-16]) \
+ : "memory" ); \
+ asm volatile ("paddq %%xmm3, %%xmm0;\n\t" \
+ "paddq %%xmm2, %%xmm0;\n\t" \
+ "paddq %%xmm1, %%xmm0;\n\t" \
+ "movdqu %%xmm0, %[w_t_m_0];\n\t" \
+ "paddq %[k], %%xmm0;\n\t" \
+ : [w_t_m_0] "=m" (w[(i)-0]) \
+ : [k] "m" (K[i]) \
+ : "memory" )
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha512_transform_i386_ssse3(u64 state[8], const unsigned char *data,
+ size_t nblks)
+{
+ unsigned int t;
+ u64 w[80];
+
+ /* Load state to MMX registers. */
+ asm volatile ("movq 8*0(%[state]), "RA";\n\t"
+ "movq 8*1(%[state]), "RB";\n\t"
+ "movq 8*2(%[state]), "RC";\n\t"
+ "movq 8*3(%[state]), "RD";\n\t"
+ "movq 8*4(%[state]), "RE";\n\t"
+ "movq 8*5(%[state]), "RF";\n\t"
+ "movq 8*6(%[state]), "RG";\n\t"
+ "movq 8*7(%[state]), "RH";\n\t"
+ :
+ : [state] "r" (state)
+ : "memory" );
+
+ asm volatile ("movdqa %[bshuf_mask], %%xmm6;\n\t"
+ :
+ : [bshuf_mask] "m" (*bshuf_mask)
+ : "memory" );
+
+ while (nblks)
+ {
+ I2(0);
+ Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+ Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+ I2(2);
+ Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+ Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+ I2(4);
+ Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+ Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+ I2(6);
+ Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+ Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+ I2(8);
+ Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+ Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+ I2(10);
+ Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+ Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+ I2(12);
+ Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+ Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+ I2(14);
+ Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+ Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+ data += 128;
+
+ for (t = 16; t < 80; t += 16)
+ {
+ W2(t + 0);
+ Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+ Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+ W2(t + 2);
+ Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+ Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+ W2(t + 4);
+ Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+ Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+ W2(t + 6);
+ Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+ Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+ W2(t + 8);
+ Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+ Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+ W2(t + 10);
+ Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+ Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+ W2(t + 12);
+ Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+ Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+ W2(t + 14);
+ Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+ Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+ }
+
+ asm volatile ("paddq 8*0(%[state]), "RA";\n\t"
+ "paddq 8*1(%[state]), "RB";\n\t"
+ "paddq 8*2(%[state]), "RC";\n\t"
+ "paddq 8*3(%[state]), "RD";\n\t"
+ "paddq 8*4(%[state]), "RE";\n\t"
+ "paddq 8*5(%[state]), "RF";\n\t"
+ "paddq 8*6(%[state]), "RG";\n\t"
+ "paddq 8*7(%[state]), "RH";\n\t"
+ "movq "RA", 8*0(%[state]);\n\t"
+ "movq "RB", 8*1(%[state]);\n\t"
+ "movq "RC", 8*2(%[state]);\n\t"
+ "movq "RD", 8*3(%[state]);\n\t"
+ "movq "RE", 8*4(%[state]);\n\t"
+ "movq "RF", 8*5(%[state]);\n\t"
+ "movq "RG", 8*6(%[state]);\n\t"
+ "movq "RH", 8*7(%[state]);\n\t"
+ :
+ : [state] "r" (state)
+ : "memory" );
+
+ nblks--;
+ }
+
+ /* Clear registers */
+ asm volatile ("pxor %%xmm0, %%xmm0;\n\t"
+ "pxor %%xmm1, %%xmm1;\n\t"
+ "pxor %%xmm2, %%xmm2;\n\t"
+ "pxor %%xmm3, %%xmm3;\n\t"
+ "pxor %%xmm4, %%xmm4;\n\t"
+ "pxor %%xmm5, %%xmm5;\n\t"
+ "pxor %%xmm6, %%xmm6;\n\t"
+ "pxor %%mm0, %%mm0;\n\t"
+ "pxor %%mm1, %%mm1;\n\t"
+ "pxor %%mm2, %%mm2;\n\t"
+ "pxor %%mm3, %%mm3;\n\t"
+ "pxor %%mm4, %%mm4;\n\t"
+ "pxor %%mm5, %%mm5;\n\t"
+ "pxor %%mm6, %%mm6;\n\t"
+ "pxor %%mm7, %%mm7;\n\t"
+ "emms;\n\t"
+ :
+ :
+ : "memory" );
+
+ return sizeof(w);
+}
+
+#if __clang__
+# pragma clang attribute pop
+# pragma clang attribute pop
+#endif
+
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/sha512.c b/comm/third_party/libgcrypt/cipher/sha512.c
new file mode 100644
index 0000000000..bc4657a8b9
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sha512.c
@@ -0,0 +1,1316 @@
+/* sha512.c - SHA384 and SHA512 hash functions
+ * Copyright (C) 2003, 2008, 2009 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/* Test vectors from FIPS-180-2:
+ *
+ * "abc"
+ * 384:
+ * CB00753F 45A35E8B B5A03D69 9AC65007 272C32AB 0EDED163
+ * 1A8B605A 43FF5BED 8086072B A1E7CC23 58BAECA1 34C825A7
+ * 512:
+ * DDAF35A1 93617ABA CC417349 AE204131 12E6FA4E 89A97EA2 0A9EEEE6 4B55D39A
+ * 2192992A 274FC1A8 36BA3C23 A3FEEBBD 454D4423 643CE80E 2A9AC94F A54CA49F
+ *
+ * "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
+ * 384:
+ * 09330C33 F71147E8 3D192FC7 82CD1B47 53111B17 3B3B05D2
+ * 2FA08086 E3B0F712 FCC7C71A 557E2DB9 66C3E9FA 91746039
+ * 512:
+ * 8E959B75 DAE313DA 8CF4F728 14FC143F 8F7779C6 EB9F7FA1 7299AEAD B6889018
+ * 501D289E 4900F7E4 331B99DE C4B5433A C7D329EE B6DD2654 5E96E55B 874BE909
+ *
+ * "a" x 1000000
+ * 384:
+ * 9D0E1809 716474CB 086E834E 310A4A1C ED149E9C 00F24852
+ * 7972CEC5 704C2A5B 07B8B3DC 38ECC4EB AE97DDD8 7F3D8985
+ * 512:
+ * E718483D 0CE76964 4E2E42C7 BC15B463 8E1F98B1 3B204428 5632A803 AFA973EB
+ * DE0FF244 877EA60A 4CB0432C E577C31B EB009C5C 2C49AA2E 4EADB217 AD8CC09B
+ */
+
+
+#include <config.h>
+#include <string.h>
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
+#undef USE_ARM_NEON_ASM
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_ARM_NEON_ASM 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+
+/* USE_ARM_ASM indicates whether to enable ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+# define USE_ARM_ASM 1
+#endif
+
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+ defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+
+/* USE_SSSE3_I386 indicates whether to compile with Intel SSSE3/i386 code. */
+#undef USE_SSSE3_I386
+#if defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \
+ defined(HAVE_GCC_INLINE_ASM_SSSE3)
+# define USE_SSSE3_I386 1
+#endif
+
+
+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code. */
+#undef USE_PPC_CRYPTO
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+# if __GNUC__ >= 4
+# define USE_PPC_CRYPTO 1
+# endif
+# endif
+#endif
+
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+
+typedef struct
+{
+ u64 h0, h1, h2, h3, h4, h5, h6, h7;
+} SHA512_STATE;
+
+typedef struct
+{
+ gcry_md_block_ctx_t bctx;
+ SHA512_STATE state;
+#ifdef USE_S390X_CRYPTO
+ u64 final_len_msb, final_len_lsb; /* needs to be right after state.h7. */
+ int use_s390x_crypto;
+#endif
+} SHA512_CONTEXT;
+
+
+static const u64 k[] =
+ {
+ U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+ U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+ U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+ U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+ U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+ U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+ U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+ U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+ U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+ U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+ U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+ U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+ U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+ U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+ U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+ U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+ U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+ U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+ U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+ U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+ U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+ U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+ U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+ U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+ U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+ U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+ U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+ U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+ U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+ U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+ U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+ U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+ U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+ U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+ U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+ U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+ U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+ U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+ U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+ U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+ };
+
+
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *))
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_ARM_NEON_ASM
+unsigned int _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
+ const unsigned char *data,
+ const u64 k[], size_t num_blks);
+
+static unsigned int
+do_sha512_transform_armv7_neon(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
+}
+#endif
+
+#ifdef USE_SSSE3
+unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_ssse3 (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_avx (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX2
+unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx2(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_avx2 (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_SSSE3_I386
+unsigned int _gcry_sha512_transform_i386_ssse3(u64 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha512_transform_i386_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_i386_ssse3 (&hd->state.h0, data, nblks);
+}
+#endif
+
+
+#ifdef USE_ARM_ASM
+unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
+ const unsigned char *data,
+ const u64 k[], size_t num_blks);
+
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
+{
+ SHA512_CONTEXT *hd = context;
+ return _gcry_sha512_transform_arm (&hd->state, data, k, nblks);
+}
+#else
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks);
+#endif
+
+
+#ifdef USE_PPC_CRYPTO
+unsigned int _gcry_sha512_transform_ppc8(u64 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+unsigned int _gcry_sha512_transform_ppc9(u64 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha512_transform_ppc8(void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_ppc8 (&hd->state.h0, data, nblks);
+}
+
+static unsigned int
+do_sha512_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_ppc9 (&hd->state.h0, data, nblks);
+}
+#endif
+
+
+#ifdef USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+do_sha512_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+
+ kimd_execute (KMID_FUNCTION_SHA512, &hd->state.h0, data, nblks * 128);
+ return 0;
+}
+
+static unsigned int
+do_sha512_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
+ u64 len_msb, u64 len_lsb)
+{
+ SHA512_CONTEXT *hd = ctx;
+
+ /* Make sure that 'final_len' is positioned at correct offset relative
+ * to 'state.h0'. This is because we are passing 'state.h0' pointer as start of
+ * parameter block to 'klmd' instruction. */
+
+ gcry_assert (offsetof (SHA512_CONTEXT, final_len_msb)
+ - offsetof (SHA512_CONTEXT, state.h0) == 8 * sizeof(u64));
+ gcry_assert (offsetof (SHA512_CONTEXT, final_len_lsb)
+ - offsetof (SHA512_CONTEXT, final_len_msb) == 1 * sizeof(u64));
+
+ hd->final_len_msb = len_msb;
+ hd->final_len_lsb = len_lsb;
+
+ klmd_execute (KMID_FUNCTION_SHA512, &hd->state.h0, data, datalen);
+ return 0;
+}
+#endif
+
+
+static void
+sha512_init_common (SHA512_CONTEXT *ctx, unsigned int flags)
+{
+ unsigned int features = _gcry_get_hw_features ();
+
+ (void)flags;
+ (void)k;
+
+ ctx->bctx.nblocks = 0;
+ ctx->bctx.nblocks_high = 0;
+ ctx->bctx.count = 0;
+ ctx->bctx.blocksize_shift = _gcry_ctz(128);
+
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ ctx->bctx.bwrite = do_transform_generic;
+#ifdef USE_ARM_NEON_ASM
+ if ((features & HWF_ARM_NEON) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
+#endif
+#ifdef USE_SSSE3
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
+#endif
+#ifdef USE_AVX
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
+#endif
+#ifdef USE_AVX2
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
+#endif
+#ifdef USE_PPC_CRYPTO
+ if ((features & HWF_PPC_VCRYPTO) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_ppc8;
+ if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_ppc9;
+#endif
+#ifdef USE_SSSE3_I386
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_i386_ssse3;
+#endif
+#ifdef USE_S390X_CRYPTO
+ ctx->use_s390x_crypto = 0;
+ if ((features & HWF_S390X_MSA) != 0)
+ {
+ if ((kimd_query () & km_function_to_mask (KMID_FUNCTION_SHA512)) &&
+ (klmd_query () & km_function_to_mask (KMID_FUNCTION_SHA512)))
+ {
+ ctx->bctx.bwrite = do_sha512_transform_s390x;
+ ctx->use_s390x_crypto = 1;
+ }
+ }
+#endif
+ (void)features;
+}
+
+
+static void
+sha512_init (void *context, unsigned int flags)
+{
+ SHA512_CONTEXT *ctx = context;
+ SHA512_STATE *hd = &ctx->state;
+
+ hd->h0 = U64_C(0x6a09e667f3bcc908);
+ hd->h1 = U64_C(0xbb67ae8584caa73b);
+ hd->h2 = U64_C(0x3c6ef372fe94f82b);
+ hd->h3 = U64_C(0xa54ff53a5f1d36f1);
+ hd->h4 = U64_C(0x510e527fade682d1);
+ hd->h5 = U64_C(0x9b05688c2b3e6c1f);
+ hd->h6 = U64_C(0x1f83d9abfb41bd6b);
+ hd->h7 = U64_C(0x5be0cd19137e2179);
+
+ sha512_init_common (ctx, flags);
+}
+
+static void
+sha384_init (void *context, unsigned int flags)
+{
+ SHA512_CONTEXT *ctx = context;
+ SHA512_STATE *hd = &ctx->state;
+
+ hd->h0 = U64_C(0xcbbb9d5dc1059ed8);
+ hd->h1 = U64_C(0x629a292a367cd507);
+ hd->h2 = U64_C(0x9159015a3070dd17);
+ hd->h3 = U64_C(0x152fecd8f70e5939);
+ hd->h4 = U64_C(0x67332667ffc00b31);
+ hd->h5 = U64_C(0x8eb44a8768581511);
+ hd->h6 = U64_C(0xdb0c2e0d64f98fa7);
+ hd->h7 = U64_C(0x47b5481dbefa4fa4);
+
+ sha512_init_common (ctx, flags);
+}
+
+
+static void
+sha512_256_init (void *context, unsigned int flags)
+{
+ SHA512_CONTEXT *ctx = context;
+ SHA512_STATE *hd = &ctx->state;
+
+ hd->h0 = U64_C(0x22312194fc2bf72c);
+ hd->h1 = U64_C(0x9f555fa3c84c64c2);
+ hd->h2 = U64_C(0x2393b86b6f53b151);
+ hd->h3 = U64_C(0x963877195940eabd);
+ hd->h4 = U64_C(0x96283ee2a88effe3);
+ hd->h5 = U64_C(0xbe5e1e2553863992);
+ hd->h6 = U64_C(0x2b0199fc2c85b8aa);
+ hd->h7 = U64_C(0x0eb72ddc81c52ca2);
+
+ sha512_init_common (ctx, flags);
+}
+
+
+static void
+sha512_224_init (void *context, unsigned int flags)
+{
+ SHA512_CONTEXT *ctx = context;
+ SHA512_STATE *hd = &ctx->state;
+
+ hd->h0 = U64_C(0x8c3d37c819544da2);
+ hd->h1 = U64_C(0x73e1996689dcd4d6);
+ hd->h2 = U64_C(0x1dfab7ae32ff9c82);
+ hd->h3 = U64_C(0x679dd514582f9fcf);
+ hd->h4 = U64_C(0x0f6d2b697bd44da8);
+ hd->h5 = U64_C(0x77e36f7304c48942);
+ hd->h6 = U64_C(0x3f9d85a86a1d36c8);
+ hd->h7 = U64_C(0x1112e6ad91d692a1);
+
+ sha512_init_common (ctx, flags);
+}
+
+
+
+#ifndef USE_ARM_ASM
+
+static inline u64
+ROTR (u64 x, u64 n)
+{
+ return ((x >> n) | (x << (64 - n)));
+}
+
+static inline u64
+Ch (u64 x, u64 y, u64 z)
+{
+ return ((x & y) ^ ( ~x & z));
+}
+
+static inline u64
+Maj (u64 x, u64 y, u64 z)
+{
+ return ((x & y) ^ (x & z) ^ (y & z));
+}
+
+static inline u64
+Sum0 (u64 x)
+{
+ return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39));
+}
+
+static inline u64
+Sum1 (u64 x)
+{
+ return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41));
+}
+
+/****************
+ * Transform the message W which consists of 16 64-bit-words
+ */
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
+{
+ SHA512_CONTEXT *ctx = context;
+ SHA512_STATE *hd = &ctx->state;
+
+ do
+ {
+ u64 a, b, c, d, e, f, g, h;
+ u64 w[16];
+ int t;
+
+ /* get values from the chaining vars */
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+ f = hd->h5;
+ g = hd->h6;
+ h = hd->h7;
+
+ for ( t = 0; t < 16; t++ )
+ w[t] = buf_get_be64(data + t * 8);
+
+#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+
+ for (t = 0; t < 80 - 16; )
+ {
+ u64 t1, t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+ w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+ w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+ w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+ w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+ w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+ w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+ w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+ w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+ w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+ w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+ w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+ w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+ w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+ w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+ w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+ w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t += 16;
+ }
+
+ for (; t < 80; )
+ {
+ u64 t1, t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t += 16;
+ }
+
+ /* Update chaining vars. */
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+ hd->h5 += f;
+ hd->h6 += g;
+ hd->h7 += h;
+
+ data += 128;
+ }
+ while (--nblks);
+
+ return (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*);
+}
+#endif /*!USE_ARM_ASM*/
+
+
+/* The routine final terminates the computation and
+ * returns the digest.
+ * The handle is prepared for a new cycle, but adding bytes to the
+ * handle will the destroy the returned buffer.
+ * Returns: 64 bytes representing the digest. When used for sha384,
+ * we take the leftmost 48 of those bytes.
+ */
+
+static void
+sha512_final (void *context)
+{
+ SHA512_CONTEXT *hd = context;
+ unsigned int burn;
+ u64 t, th, msb, lsb;
+ byte *p;
+
+ t = hd->bctx.nblocks;
+ /* if (sizeof t == sizeof hd->bctx.nblocks) */
+ th = hd->bctx.nblocks_high;
+ /* else */
+ /* th = hd->bctx.nblocks >> 64; In case we ever use u128 */
+
+ /* multiply by 128 to make a byte count */
+ lsb = t << 7;
+ msb = (th << 7) | (t >> 57);
+ /* add the count */
+ t = lsb;
+ if ((lsb += hd->bctx.count) < t)
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 61;
+
+ if (0)
+ { }
+#ifdef USE_S390X_CRYPTO
+ else if (hd->use_s390x_crypto)
+ {
+ burn = do_sha512_final_s390x (hd, hd->bctx.buf, hd->bctx.count, msb, lsb);
+ }
+#endif
+ else
+ {
+ if (hd->bctx.count < 112)
+ {
+ /* enough room */
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+ if (hd->bctx.count < 112)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 112 - hd->bctx.count);
+ }
+ else
+ {
+ /* need one extra block */
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+ if (hd->bctx.count < 128)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 128 - hd->bctx.count);
+ hd->bctx.count = 128;
+ _gcry_md_block_write (context, NULL, 0); /* flush */
+ memset (hd->bctx.buf, 0, 112); /* fill next block with zeroes */
+ }
+ /* append the 128 bit count */
+ buf_put_be64(hd->bctx.buf + 112, msb);
+ buf_put_be64(hd->bctx.buf + 120, lsb);
+ burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_be64(p, hd->state.h##a); p += 8; } while (0)
+ X (0);
+ X (1);
+ X (2);
+ X (3);
+ X (4);
+ X (5);
+ /* Note that these last two chunks are included even for SHA384.
+ We just ignore them. */
+ X (6);
+ X (7);
+#undef X
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static byte *
+sha512_read (void *context)
+{
+ SHA512_CONTEXT *hd = (SHA512_CONTEXT *) context;
+ return hd->bctx.buf;
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 64 bytes. */
+void
+_gcry_sha512_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SHA512_CONTEXT hd;
+
+ sha512_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 64);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+void
+_gcry_sha512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
+{
+ SHA512_CONTEXT hd;
+
+ sha512_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 64);
+}
+
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 48 bytes. */
+static void
+_gcry_sha384_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SHA512_CONTEXT hd;
+
+ sha384_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 48);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+static void
+_gcry_sha384_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
+{
+ SHA512_CONTEXT hd;
+
+ sha384_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 48);
+}
+
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 32 bytes. */
+static void
+_gcry_sha512_256_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SHA512_CONTEXT hd;
+
+ sha512_256_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+static void
+_gcry_sha512_256_hash_buffers (void *outbuf, const gcry_buffer_t *iov,
+ int iovcnt)
+{
+ SHA512_CONTEXT hd;
+
+ sha512_256_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 28 bytes. */
+static void
+_gcry_sha512_224_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SHA512_CONTEXT hd;
+
+ sha512_224_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 28);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+static void
+_gcry_sha512_224_hash_buffers (void *outbuf, const gcry_buffer_t *iov,
+ int iovcnt)
+{
+ SHA512_CONTEXT hd;
+
+ sha512_224_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sha512_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 28);
+}
+
+
+
+/*
+ Self-test section.
+ */
+
+
+static gpg_err_code_t
+selftests_sha384 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA384, 0,
+ "abc", 3,
+ "\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50\x07"
+ "\x27\x2c\x32\xab\x0e\xde\xd1\x63\x1a\x8b\x60\x5a\x43\xff\x5b\xed"
+ "\x80\x86\x07\x2b\xa1\xe7\xcc\x23\x58\xba\xec\xa1\x34\xc8\x25\xa7", 48);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA384, 0,
+ "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+ "\x09\x33\x0C\x33\xF7\x11\x47\xE8\x3D\x19\x2F\xC7\x82\xCD\x1B\x47"
+ "\x53\x11\x1B\x17\x3B\x3B\x05\xD2\x2F\xA0\x80\x86\xE3\xB0\xF7\x12"
+ "\xFC\xC7\xC7\x1A\x55\x7E\x2D\xB9\x66\xC3\xE9\xFA\x91\x74\x60\x39",
+ 48);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA384, 1,
+ NULL, 0,
+ "\x9D\x0E\x18\x09\x71\x64\x74\xCB\x08\x6E\x83\x4E\x31\x0A\x4A\x1C"
+ "\xED\x14\x9E\x9C\x00\xF2\x48\x52\x79\x72\xCE\xC5\x70\x4C\x2A\x5B"
+ "\x07\xB8\xB3\xDC\x38\xEC\xC4\xEB\xAE\x97\xDD\xD8\x7F\x3D\x89\x85",
+ 48);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SHA384, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+selftests_sha512 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512, 0,
+ "abc", 3,
+ "\xDD\xAF\x35\xA1\x93\x61\x7A\xBA\xCC\x41\x73\x49\xAE\x20\x41\x31"
+ "\x12\xE6\xFA\x4E\x89\xA9\x7E\xA2\x0A\x9E\xEE\xE6\x4B\x55\xD3\x9A"
+ "\x21\x92\x99\x2A\x27\x4F\xC1\xA8\x36\xBA\x3C\x23\xA3\xFE\xEB\xBD"
+ "\x45\x4D\x44\x23\x64\x3C\xE8\x0E\x2A\x9A\xC9\x4F\xA5\x4C\xA4\x9F", 64);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512, 0,
+ "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+ "\x8E\x95\x9B\x75\xDA\xE3\x13\xDA\x8C\xF4\xF7\x28\x14\xFC\x14\x3F"
+ "\x8F\x77\x79\xC6\xEB\x9F\x7F\xA1\x72\x99\xAE\xAD\xB6\x88\x90\x18"
+ "\x50\x1D\x28\x9E\x49\x00\xF7\xE4\x33\x1B\x99\xDE\xC4\xB5\x43\x3A"
+ "\xC7\xD3\x29\xEE\xB6\xDD\x26\x54\x5E\x96\xE5\x5B\x87\x4B\xE9\x09",
+ 64);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512, 1,
+ NULL, 0,
+ "\xE7\x18\x48\x3D\x0C\xE7\x69\x64\x4E\x2E\x42\xC7\xBC\x15\xB4\x63"
+ "\x8E\x1F\x98\xB1\x3B\x20\x44\x28\x56\x32\xA8\x03\xAF\xA9\x73\xEB"
+ "\xDE\x0F\xF2\x44\x87\x7E\xA6\x0A\x4C\xB0\x43\x2C\xE5\x77\xC3\x1B"
+ "\xEB\x00\x9C\x5C\x2C\x49\xAA\x2E\x4E\xAD\xB2\x17\xAD\x8C\xC0\x9B",
+ 64);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SHA512, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+selftests_sha512_224 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512_224, 0,
+ "abc", 3,
+ "\x46\x34\x27\x0F\x70\x7B\x6A\x54\xDA\xAE\x75\x30\x46\x08\x42\xE2"
+ "\x0E\x37\xED\x26\x5C\xEE\xE9\xA4\x3E\x89\x24\xAA",
+ 28);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512_224, 0,
+ "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+ "\x23\xFE\xC5\xBB\x94\xD6\x0B\x23\x30\x81\x92\x64\x0B\x0C\x45\x33"
+ "\x35\xD6\x64\x73\x4F\xE4\x0E\x72\x68\x67\x4A\xF9",
+ 28);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512_224, 1,
+ NULL, 0,
+ "\x37\xab\x33\x1d\x76\xf0\xd3\x6d\xe4\x22\xbd\x0e\xde\xb2\x2a\x28"
+ "\xac\xcd\x48\x7b\x7a\x84\x53\xae\x96\x5d\xd2\x87",
+ 28);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SHA512_224, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+selftests_sha512_256 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512_256, 0,
+ "abc", 3,
+ "\x53\x04\x8E\x26\x81\x94\x1E\xF9\x9B\x2E\x29\xB7\x6B\x4C\x7D\xAB"
+ "\xE4\xC2\xD0\xC6\x34\xFC\x6D\x46\xE0\xE2\xF1\x31\x07\xE7\xAF\x23",
+ 32);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512_256, 0,
+ "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+ "\x39\x28\xE1\x84\xFB\x86\x90\xF8\x40\xDA\x39\x88\x12\x1D\x31\xBE"
+ "\x65\xCB\x9D\x3E\xF8\x3E\xE6\x14\x6F\xEA\xC8\x61\xE1\x9B\x56\x3A",
+ 32);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SHA512_256, 1,
+ NULL, 0,
+ "\x9a\x59\xa0\x52\x93\x01\x87\xa9\x70\x38\xca\xe6\x92\xf3\x07\x08"
+ "\xaa\x64\x91\x92\x3e\xf5\x19\x43\x94\xdc\x68\xd5\x6c\x74\xfb\x21",
+ 32);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SHA512_256, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_MD_SHA384:
+ ec = selftests_sha384 (extended, report);
+ break;
+ case GCRY_MD_SHA512:
+ ec = selftests_sha512 (extended, report);
+ break;
+ case GCRY_MD_SHA512_224:
+ ec = selftests_sha512_224 (extended, report);
+ break;
+ case GCRY_MD_SHA512_256:
+ ec = selftests_sha512_256 (extended, report);
+ break;
+ default:
+ ec = GPG_ERR_DIGEST_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+
+
+
+static byte sha512_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.3 */
+ {
+ 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
+ 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x05,
+ 0x00, 0x04, 0x40
+ };
+
+static gcry_md_oid_spec_t oid_spec_sha512[] =
+ {
+ { "2.16.840.1.101.3.4.2.3" },
+
+ /* PKCS#1 sha512WithRSAEncryption */
+ { "1.2.840.113549.1.1.13" },
+
+ { NULL }
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha512 =
+ {
+ GCRY_MD_SHA512, {0, 1},
+ "SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64,
+ sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+ _gcry_sha512_hash_buffer, _gcry_sha512_hash_buffers,
+ sizeof (SHA512_CONTEXT),
+ run_selftests
+ };
+
+static byte sha384_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.2 */
+ {
+ 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
+ 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, 0x05,
+ 0x00, 0x04, 0x30
+ };
+
+static gcry_md_oid_spec_t oid_spec_sha384[] =
+ {
+ { "2.16.840.1.101.3.4.2.2" },
+
+ /* PKCS#1 sha384WithRSAEncryption */
+ { "1.2.840.113549.1.1.12" },
+
+ /* SHA384WithECDSA: RFC 7427 (A.3.3.) */
+ { "1.2.840.10045.4.3.3" },
+
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha384 =
+ {
+ GCRY_MD_SHA384, {0, 1},
+ "SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48,
+ sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+ _gcry_sha384_hash_buffer, _gcry_sha384_hash_buffers,
+ sizeof (SHA512_CONTEXT),
+ run_selftests
+ };
+
+static byte sha512_256_asn[] = { 0x30 };
+
+static gcry_md_oid_spec_t oid_spec_sha512_256[] =
+ {
+ { "2.16.840.1.101.3.4.2.6" },
+
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha512_256 =
+ {
+ GCRY_MD_SHA512_256, {0, 1},
+ "SHA512_256", sha512_256_asn, DIM (sha512_256_asn), oid_spec_sha512_256, 32,
+ sha512_256_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+ _gcry_sha512_256_hash_buffer, _gcry_sha512_256_hash_buffers,
+ sizeof (SHA512_CONTEXT),
+ run_selftests
+ };
+
+static byte sha512_224_asn[] = { 0x30 };
+
+static gcry_md_oid_spec_t oid_spec_sha512_224[] =
+ {
+ { "2.16.840.1.101.3.4.2.5" },
+
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sha512_224 =
+ {
+ GCRY_MD_SHA512_224, {0, 1},
+ "SHA512_224", sha512_224_asn, DIM (sha512_224_asn), oid_spec_sha512_224, 28,
+ sha512_224_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+ _gcry_sha512_224_hash_buffer, _gcry_sha512_224_hash_buffers,
+ sizeof (SHA512_CONTEXT),
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/sm3.c b/comm/third_party/libgcrypt/cipher/sm3.c
new file mode 100644
index 0000000000..0f9bae3bf5
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sm3.c
@@ -0,0 +1,473 @@
+/* sm3.c - SM3 hash function
+ * Copyright (C) 2017 Jia Zhang
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/* Test vectors:
+
+ "abc"
+ SM3: 66c7f0f4 62eeedd9 d1f2d46b dc10e4e2 4167c487 5cf2f7a2 297da02b 8f4ba8e0
+
+ "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+ SM3: debe9ff9 2275b8a1 38604889 c18e5a4d 6fdb70e5 387e5765 293dcba3 9c0c5732
+
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+ SM3: 639b6cc5 e64d9e37 a390b192 df4fa1ea 0720ab74 7ff692b9 f38c4e66 ad7b8c05
+
+ "a" one million times
+ SM3: c8aaf894 29554029 e231941a 2acc0ad6 1ff2a5ac d8fadd25 847a3a73 2b3b02c3
+
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+typedef struct {
+ gcry_md_block_ctx_t bctx;
+ u32 h0,h1,h2,h3,h4,h5,h6,h7;
+} SM3_CONTEXT;
+
+
+static unsigned int
+transform (void *c, const unsigned char *data, size_t nblks);
+
+
+static void
+sm3_init (void *context, unsigned int flags)
+{
+ SM3_CONTEXT *hd = context;
+ unsigned int features = _gcry_get_hw_features ();
+
+ (void)flags;
+
+ hd->h0 = 0x7380166f;
+ hd->h1 = 0x4914b2b9;
+ hd->h2 = 0x172442d7;
+ hd->h3 = 0xda8a0600;
+ hd->h4 = 0xa96f30bc;
+ hd->h5 = 0x163138aa;
+ hd->h6 = 0xe38dee4d;
+ hd->h7 = 0xb0fb0e4e;
+
+ hd->bctx.nblocks = 0;
+ hd->bctx.nblocks_high = 0;
+ hd->bctx.count = 0;
+ hd->bctx.blocksize_shift = _gcry_ctz(64);
+ hd->bctx.bwrite = transform;
+
+ (void)features;
+}
+
+
+/*
+ Transform the message X which consists of 16 32-bit-words. See
+ GM/T 004-2012 for details. */
+#define R(i,a,b,c,d,e,f,g,h,t,w1,w2) do \
+ { \
+ ss1 = rol ((rol ((a), 12) + (e) + (t)), 7); \
+ ss2 = ss1 ^ rol ((a), 12); \
+ d += FF##i(a,b,c) + ss2 + ((w1) ^ (w2)); \
+ h += GG##i(e,f,g) + ss1 + (w1); \
+ b = rol ((b), 9); \
+ f = rol ((f), 19); \
+ h = P0 ((h)); \
+ } while (0)
+
+#define R1(a,b,c,d,e,f,g,h,t,w1,w2) R(1,a,b,c,d,e,f,g,h,t,w1,w2)
+#define R2(a,b,c,d,e,f,g,h,t,w1,w2) R(2,a,b,c,d,e,f,g,h,t,w1,w2)
+
+#define FF1(x, y, z) (x ^ y ^ z)
+
+#define FF2(x, y, z) ((x & y) | (x & z) | (y & z))
+
+#define GG1(x, y, z) (x ^ y ^ z)
+
+#define GG2(x, y, z) ((x & y) | ( ~x & z))
+
+/* Message expansion */
+#define P0(x) ((x) ^ rol ((x), 9) ^ rol ((x), 17))
+#define P1(x) ((x) ^ rol ((x), 15) ^ rol ((x), 23))
+#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
+#define W1(i) ( w[i&0x0f] )
+#define W2(i) ( w[i&0x0f] = P1(w[i &0x0f] \
+ ^ w[(i-9)&0x0f] \
+ ^ rol (w[(i-3)&0x0f], 15)) \
+ ^ rol (w[(i-13)&0x0f], 7) \
+ ^ w[(i-6)&0x0f] )
+
+static unsigned int
+transform_blk (void *ctx, const unsigned char *data)
+{
+ SM3_CONTEXT *hd = ctx;
+ static const u32 K[64] = {
+ 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb,
+ 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc,
+ 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce,
+ 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6,
+ 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+ 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+ 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+ 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5,
+ 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53,
+ 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d,
+ 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4,
+ 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43,
+ 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+ 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+ 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+ 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+ };
+
+ u32 a,b,c,d,e,f,g,h,ss1,ss2;
+ u32 w[16];
+
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+ f = hd->h5;
+ g = hd->h6;
+ h = hd->h7;
+
+ R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4));
+ R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5));
+ R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6));
+ R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7));
+ R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8));
+ R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9));
+ R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10));
+ R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11));
+ R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12));
+ R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13));
+ R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14));
+ R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15));
+ R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16));
+ R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17));
+ R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18));
+ R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19));
+
+ R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20));
+ R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21));
+ R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22));
+ R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23));
+ R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24));
+ R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25));
+ R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26));
+ R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27));
+ R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28));
+ R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29));
+ R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30));
+ R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31));
+ R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32));
+ R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33));
+ R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34));
+ R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35));
+
+ R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36));
+ R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37));
+ R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38));
+ R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39));
+ R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40));
+ R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41));
+ R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42));
+ R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43));
+ R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44));
+ R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45));
+ R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46));
+ R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47));
+ R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48));
+ R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49));
+ R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50));
+ R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51));
+
+ R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52));
+ R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53));
+ R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54));
+ R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55));
+ R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56));
+ R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57));
+ R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58));
+ R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59));
+ R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60));
+ R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61));
+ R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62));
+ R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63));
+ R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64));
+ R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65));
+ R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66));
+ R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67));
+
+ hd->h0 ^= a;
+ hd->h1 ^= b;
+ hd->h2 ^= c;
+ hd->h3 ^= d;
+ hd->h4 ^= e;
+ hd->h5 ^= f;
+ hd->h6 ^= g;
+ hd->h7 ^= h;
+
+ return /*burn_stack*/ 26*4+32;
+}
+#undef P0
+#undef P1
+#undef R
+#undef R1
+#undef R2
+
+static unsigned int
+transform (void *ctx, const unsigned char *data, size_t nblks)
+{
+ SM3_CONTEXT *hd = ctx;
+ unsigned int burn;
+
+ do
+ {
+ burn = transform_blk (hd, data);
+ data += 64;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+
+/*
+ The routine finally terminates the computation and returns the
+ digest. The handle is prepared for a new cycle, but adding bytes
+ to the handle will the destroy the returned buffer. Returns: 32
+ bytes with the message the digest. */
+static void
+sm3_final(void *context)
+{
+ SM3_CONTEXT *hd = context;
+ u32 t, th, msb, lsb;
+ byte *p;
+ unsigned int burn;
+
+ t = hd->bctx.nblocks;
+ if (sizeof t == sizeof hd->bctx.nblocks)
+ th = hd->bctx.nblocks_high;
+ else
+ th = hd->bctx.nblocks >> 32;
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 26);
+ /* add the count */
+ t = lsb;
+ if ((lsb += hd->bctx.count) < t)
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if (hd->bctx.count < 56) /* enough room */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+ if (hd->bctx.count < 56)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+ /* append the 64 bit count */
+ buf_put_be32(hd->bctx.buf + 56, msb);
+ buf_put_be32(hd->bctx.buf + 60, lsb);
+ burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 1 );
+ }
+ else /* need one extra block */
+ {
+ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+ /* fill pad and next block with zeroes */
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+ /* append the 64 bit count */
+ buf_put_be32(hd->bctx.buf + 64 + 56, msb);
+ buf_put_be32(hd->bctx.buf + 64 + 60, lsb);
+ burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 2 );
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
+ X(0);
+ X(1);
+ X(2);
+ X(3);
+ X(4);
+ X(5);
+ X(6);
+ X(7);
+#undef X
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static byte *
+sm3_read (void *context)
+{
+ SM3_CONTEXT *hd = context;
+
+ return hd->bctx.buf;
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer
+ * into outbuf which must have a size of 32 bytes. */
+void
+_gcry_sm3_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+ SM3_CONTEXT hd;
+
+ sm3_init (&hd, 0);
+ _gcry_md_block_write (&hd, buffer, length);
+ sm3_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers. */
+void
+_gcry_sm3_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
+{
+ SM3_CONTEXT hd;
+
+ sm3_init (&hd, 0);
+ for (;iovcnt > 0; iov++, iovcnt--)
+ _gcry_md_block_write (&hd,
+ (const char*)iov[0].data + iov[0].off, iov[0].len);
+ sm3_final (&hd);
+ memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+
+/*
+ Self-test section.
+ */
+
+
+static gpg_err_code_t
+selftests_sm3 (int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ what = "short string (spec example 1)";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SM3, 0,
+ "abc", 3,
+ "\x66\xc7\xf0\xf4\x62\xee\xed\xd9\xd1\xf2\xd4\x6b\xdc\x10\xe4\xe2"
+ "\x41\x67\xc4\x87\x5c\xf2\xf7\xa2\x29\x7d\xa0\x2b\x8f\x4b\xa8\xe0", 32);
+ if (errtxt)
+ goto failed;
+
+ if (extended)
+ {
+ what = "long string (spec example 2)";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SM3, 0,
+ "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd", 64,
+ "\xde\xbe\x9f\xf9\x22\x75\xb8\xa1\x38\x60\x48\x89\xc1\x8e\x5a\x4d"
+ "\x6f\xdb\x70\xe5\x38\x7e\x57\x65\x29\x3d\xcb\xa3\x9c\x0c\x57\x32",
+ 32);
+ if (errtxt)
+ goto failed;
+
+ what = "long string";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SM3, 0,
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
+ "\x63\x9b\x6c\xc5\xe6\x4d\x9e\x37\xa3\x90\xb1\x92\xdf\x4f\xa1\xea"
+ "\x07\x20\xab\x74\x7f\xf6\x92\xb9\xf3\x8c\x4e\x66\xad\x7b\x8c\x05",
+ 32);
+ if (errtxt)
+ goto failed;
+
+ what = "one million \"a\"";
+ errtxt = _gcry_hash_selftest_check_one
+ (GCRY_MD_SM3, 1,
+ NULL, 0,
+ "\xc8\xaa\xf8\x94\x29\x55\x40\x29\xe2\x31\x94\x1a\x2a\xcc\x0a\xd6"
+ "\x1f\xf2\xa5\xac\xd8\xfa\xdd\x25\x84\x7a\x3a\x73\x2b\x3b\x02\xc3",
+ 32);
+ if (errtxt)
+ goto failed;
+ }
+
+ return 0; /* Succeeded. */
+
+ failed:
+ if (report)
+ report ("digest", GCRY_MD_SM3, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success. */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ gpg_err_code_t ec;
+
+ switch (algo)
+ {
+ case GCRY_MD_SM3:
+ ec = selftests_sm3 (extended, report);
+ break;
+ default:
+ ec = GPG_ERR_DIGEST_ALGO;
+ break;
+
+ }
+ return ec;
+}
+
+static byte asn_sm3[] = /* Object ID is 1.2.156.10197.401 */
+ { 0x30, 0x2F, 0x30, 0x0B, 0x06, 0x07, 0x2A, 0x81,
+ 0x1C, 0xCF, 0x55, 0x83, 0x11, 0x05, 0x00, 0x04,
+ 0x20 };
+
+static gcry_md_oid_spec_t oid_spec_sm3[] =
+ {
+ /* China Electronics Standardization Instutute,
+ OID White paper (2015), Table 6 */
+ { "1.2.156.10197.401" },
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_sm3 =
+ {
+ GCRY_MD_SM3, {0, 0},
+ "SM3", asn_sm3, DIM (asn_sm3), oid_spec_sm3, 32,
+ sm3_init, _gcry_md_block_write, sm3_final, sm3_read, NULL,
+ _gcry_sm3_hash_buffer, _gcry_sm3_hash_buffers,
+ sizeof (SM3_CONTEXT),
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/sm4-aesni-avx-amd64.S b/comm/third_party/libgcrypt/cipher/sm4-aesni-avx-amd64.S
new file mode 100644
index 0000000000..3610b98c67
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sm4-aesni-avx-amd64.S
@@ -0,0 +1,987 @@
+/* sm4-avx-aesni-amd64.S - AES-NI/AVX implementation of SM4 cipher
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Based on SM4 AES-NI work by Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+/* vector registers */
+#define RX0 %xmm0
+#define RX1 %xmm1
+#define MASK_4BIT %xmm2
+#define RTMP0 %xmm3
+#define RTMP1 %xmm4
+#define RTMP2 %xmm5
+#define RTMP3 %xmm6
+#define RTMP4 %xmm7
+
+#define RA0 %xmm8
+#define RA1 %xmm9
+#define RA2 %xmm10
+#define RA3 %xmm11
+
+#define RB0 %xmm12
+#define RB1 %xmm13
+#define RB2 %xmm14
+#define RB3 %xmm15
+
+#define RNOT %xmm0
+#define RBSWAP %xmm1
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+/* Transpose four 32-bit words between 128-bit vectors. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* post-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction. */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/**********************************************************************
+ 4-way && 8-way SM4 with AES-NI and AVX
+ **********************************************************************/
+
+.text
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+.align 8
+.globl _gcry_sm4_aesni_avx_expand_key
+ELF(.type _gcry_sm4_aesni_avx_expand_key,@function;)
+_gcry_sm4_aesni_avx_expand_key:
+ /* input:
+ * %rdi: 128-bit key
+ * %rsi: rkey_enc
+ * %rdx: rkey_dec
+ * %rcx: fk array
+ * %r8: ck array
+ */
+ CFI_STARTPROC();
+
+ vmovd 0*4(%rdi), RA0;
+ vmovd 1*4(%rdi), RA1;
+ vmovd 2*4(%rdi), RA2;
+ vmovd 3*4(%rdi), RA3;
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vmovd 0*4(%rcx), RB0;
+ vmovd 1*4(%rcx), RB1;
+ vmovd 2*4(%rcx), RB2;
+ vmovd 3*4(%rcx), RB3;
+ vpxor RB0, RA0, RA0;
+ vpxor RB1, RA1, RA1;
+ vpxor RB2, RA2, RA2;
+ vpxor RB3, RA3, RA3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
+ vmovdqa .Lpre_tf_hi_s rRIP, RB0;
+ vmovdqa .Lpost_tf_lo_s rRIP, RB1;
+ vmovdqa .Lpost_tf_hi_s rRIP, RB2;
+ vmovdqa .Linv_shift_row rRIP, RB3;
+
+#define ROUND(round, s0, s1, s2, s3) \
+ vbroadcastss (4*(round))(%r8), RX0; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RB3, RX0, RX0; \
+ vpxor RX0, s0, s0; /* s0 ^ x */ \
+ vpslld $13, RX0, RTMP0; \
+ vpsrld $19, RX0, RTMP1; \
+ vpslld $23, RX0, RTMP2; \
+ vpsrld $9, RX0, RTMP3; \
+ vpxor RTMP0, RTMP1, RTMP1; \
+ vpxor RTMP2, RTMP3, RTMP3; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,13) */ \
+ vpxor RTMP3, s0, s0; /* s0 ^ x ^ rol(x,13) ^ rol(x,23) */
+
+ leaq (32*4)(%r8), %rax;
+ leaq (32*4)(%rdx), %rdx;
+.align 16
+.Lroundloop_expand_key:
+ leaq (-4*4)(%rdx), %rdx;
+ ROUND(0, RA0, RA1, RA2, RA3);
+ ROUND(1, RA1, RA2, RA3, RA0);
+ ROUND(2, RA2, RA3, RA0, RA1);
+ ROUND(3, RA3, RA0, RA1, RA2);
+ leaq (4*4)(%r8), %r8;
+ vmovd RA0, (0*4)(%rsi);
+ vmovd RA1, (1*4)(%rsi);
+ vmovd RA2, (2*4)(%rsi);
+ vmovd RA3, (3*4)(%rsi);
+ vmovd RA0, (3*4)(%rdx);
+ vmovd RA1, (2*4)(%rdx);
+ vmovd RA2, (1*4)(%rdx);
+ vmovd RA3, (0*4)(%rdx);
+ leaq (4*4)(%rsi), %rsi;
+ cmpq %rax, %r8;
+ jne .Lroundloop_expand_key;
+
+#undef ROUND
+
+ vzeroall;
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_expand_key,.-_gcry_sm4_aesni_avx_expand_key;)
+
+.align 8
+ELF(.type sm4_aesni_avx_crypt_blk1_4,@function;)
+sm4_aesni_avx_crypt_blk1_4:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..4 blocks)
+ * %rdx: src (1..4 blocks)
+ * %rcx: num blocks (1..4)
+ */
+ CFI_STARTPROC();
+
+ vmovdqu 0*16(%rdx), RA0;
+ vmovdqa RA0, RA1;
+ vmovdqa RA0, RA2;
+ vmovdqa RA0, RA3;
+ cmpq $2, %rcx;
+ jb .Lblk4_load_input_done;
+ vmovdqu 1*16(%rdx), RA1;
+ je .Lblk4_load_input_done;
+ vmovdqu 2*16(%rdx), RA2;
+ cmpq $3, %rcx;
+ je .Lblk4_load_input_done;
+ vmovdqu 3*16(%rdx), RA3;
+
+.Lblk4_load_input_done:
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
+ vmovdqa .Lpre_tf_hi_s rRIP, RB0;
+ vmovdqa .Lpost_tf_lo_s rRIP, RB1;
+ vmovdqa .Lpost_tf_hi_s rRIP, RB2;
+ vmovdqa .Linv_shift_row rRIP, RB3;
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2;
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RB3, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP2, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP3, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk4:
+ ROUND(0, RA0, RA1, RA2, RA3);
+ ROUND(1, RA1, RA2, RA3, RA0);
+ ROUND(2, RA2, RA3, RA0, RA1);
+ ROUND(3, RA3, RA0, RA1, RA2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk4;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vmovdqu RA0, 0*16(%rsi);
+ cmpq $2, %rcx;
+ jb .Lblk4_store_output_done;
+ vmovdqu RA1, 1*16(%rsi);
+ je .Lblk4_store_output_done;
+ vmovdqu RA2, 2*16(%rsi);
+ cmpq $3, %rcx;
+ je .Lblk4_store_output_done;
+ vmovdqu RA3, 3*16(%rsi);
+
+.Lblk4_store_output_done:
+ vzeroall;
+ xorl %eax, %eax;
+ ret;
+ CFI_ENDPROC();
+ELF(.size sm4_aesni_avx_crypt_blk1_4,.-sm4_aesni_avx_crypt_blk1_4;)
+
+.align 8
+ELF(.type __sm4_crypt_blk8,@function;)
+__sm4_crypt_blk8:
+ /* input:
+ * %rdi: round key array, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * ciphertext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+ * blocks
+ */
+ CFI_STARTPROC();
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \
+ vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \
+ vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vmovdqa .Linv_shift_row rRIP, RTMP4; \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ vaesenclast MASK_4BIT, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __sm4_crypt_blk8,.-__sm4_crypt_blk8;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_crypt_blk1_8
+ELF(.type _gcry_sm4_aesni_avx_crypt_blk1_8,@function;)
+_gcry_sm4_aesni_avx_crypt_blk1_8:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..8 blocks)
+ * %rdx: src (1..8 blocks)
+ * %rcx: num blocks (1..8)
+ */
+ CFI_STARTPROC();
+
+ cmpq $5, %rcx;
+ jb sm4_aesni_avx_crypt_blk1_4;
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqa RB0, RB1;
+ vmovdqa RB0, RB2;
+ vmovdqa RB0, RB3;
+ je .Lblk8_load_input_done;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ cmpq $7, %rcx;
+ jb .Lblk8_load_input_done;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ je .Lblk8_load_input_done;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+.Lblk8_load_input_done:
+ call __sm4_crypt_blk8;
+
+ cmpq $6, %rcx;
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ jb .Lblk8_store_output_done;
+ vmovdqu RB1, (5 * 16)(%rsi);
+ je .Lblk8_store_output_done;
+ vmovdqu RB2, (6 * 16)(%rsi);
+ cmpq $7, %rcx;
+ je .Lblk8_store_output_done;
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+.Lblk8_store_output_done:
+ vzeroall;
+ xorl %eax, %eax;
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_crypt_blk1_8,.-_gcry_sm4_aesni_avx_crypt_blk1_8;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ctr_enc
+ELF(.type _gcry_sm4_aesni_avx_ctr_enc,@function;)
+_gcry_sm4_aesni_avx_ctr_enc:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RA0;
+
+ vmovdqa .Lbswap128_mask rRIP, RBSWAP;
+ vpshufb RBSWAP, RA0, RTMP0; /* be => le */
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP2); /* +1 */
+ vpshufb RBSWAP, RTMP0, RA1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +2 */
+ vpshufb RBSWAP, RTMP0, RA2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +3 */
+ vpshufb RBSWAP, RTMP0, RA3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +4 */
+ vpshufb RBSWAP, RTMP0, RB0;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +5 */
+ vpshufb RBSWAP, RTMP0, RB1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +6 */
+ vpshufb RBSWAP, RTMP0, RB2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +7 */
+ vpshufb RBSWAP, RTMP0, RB3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +8 */
+ vpshufb RBSWAP, RTMP0, RTMP1;
+
+ /* store new IV */
+ vmovdqu RTMP1, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ctr_enc,.-_gcry_sm4_aesni_avx_ctr_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_cbc_dec
+ELF(.type _gcry_sm4_aesni_avx_cbc_dec,@function;)
+_gcry_sm4_aesni_avx_cbc_dec:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+ call __sm4_crypt_blk8;
+
+ vmovdqu (7 * 16)(%rdx), RNOT;
+ vpxor (%rcx), RA0, RA0;
+ vpxor (0 * 16)(%rdx), RA1, RA1;
+ vpxor (1 * 16)(%rdx), RA2, RA2;
+ vpxor (2 * 16)(%rdx), RA3, RA3;
+ vpxor (3 * 16)(%rdx), RB0, RB0;
+ vpxor (4 * 16)(%rdx), RB1, RB1;
+ vpxor (5 * 16)(%rdx), RB2, RB2;
+ vpxor (6 * 16)(%rdx), RB3, RB3;
+ vmovdqu RNOT, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_cbc_dec,.-_gcry_sm4_aesni_avx_cbc_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_cfb_dec
+ELF(.type _gcry_sm4_aesni_avx_cfb_dec,@function;)
+_gcry_sm4_aesni_avx_cfb_dec:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ /* Load input */
+ vmovdqu (%rcx), RA0;
+ vmovdqu 0 * 16(%rdx), RA1;
+ vmovdqu 1 * 16(%rdx), RA2;
+ vmovdqu 2 * 16(%rdx), RA3;
+ vmovdqu 3 * 16(%rdx), RB0;
+ vmovdqu 4 * 16(%rdx), RB1;
+ vmovdqu 5 * 16(%rdx), RB2;
+ vmovdqu 6 * 16(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu 7 * 16(%rdx), RNOT;
+ vmovdqu RNOT, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_cfb_dec,.-_gcry_sm4_aesni_avx_cfb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ocb_enc
+ELF(.type _gcry_sm4_aesni_avx_ocb_enc,@function;)
+
+_gcry_sm4_aesni_avx_ocb_enc:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[8])
+ */
+ CFI_STARTPROC();
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rcx), RTMP0;
+ vmovdqu (%r8), RTMP1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ vmovdqu (n * 16)(%rdx), xreg; \
+ vpxor (lreg), RTMP0, RTMP0; \
+ vpxor xreg, RTMP1, RTMP1; \
+ vpxor RTMP0, xreg, xreg; \
+ vmovdqu RTMP0, (n * 16)(%rsi);
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, RA0);
+ OCB_INPUT(1, %r11, RA1);
+ OCB_INPUT(2, %r12, RA2);
+ OCB_INPUT(3, %r13, RA3);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, RB0);
+ OCB_INPUT(5, %r11, RB1);
+ OCB_INPUT(6, %r12, RB2);
+ OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0, (%rcx);
+ vmovdqu RTMP1, (%r8);
+
+ movq (0 * 8)(%rsp), %r10;
+ CFI_RESTORE(%r10);
+ movq (1 * 8)(%rsp), %r11;
+ CFI_RESTORE(%r11);
+ movq (2 * 8)(%rsp), %r12;
+ CFI_RESTORE(%r12);
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r13);
+
+ call __sm4_crypt_blk8;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vpxor (0 * 16)(%rsi), RA0, RA0;
+ vpxor (1 * 16)(%rsi), RA1, RA1;
+ vpxor (2 * 16)(%rsi), RA2, RA2;
+ vpxor (3 * 16)(%rsi), RA3, RA3;
+ vpxor (4 * 16)(%rsi), RB0, RB0;
+ vpxor (5 * 16)(%rsi), RB1, RB1;
+ vpxor (6 * 16)(%rsi), RB2, RB2;
+ vpxor (7 * 16)(%rsi), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ocb_enc,.-_gcry_sm4_aesni_avx_ocb_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ocb_dec
+ELF(.type _gcry_sm4_aesni_avx_ocb_dec,@function;)
+
+_gcry_sm4_aesni_avx_ocb_dec:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[8])
+ */
+ CFI_STARTPROC();
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ movdqu (%rcx), RTMP0;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ vmovdqu (n * 16)(%rdx), xreg; \
+ vpxor (lreg), RTMP0, RTMP0; \
+ vpxor RTMP0, xreg, xreg; \
+ vmovdqu RTMP0, (n * 16)(%rsi);
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, RA0);
+ OCB_INPUT(1, %r11, RA1);
+ OCB_INPUT(2, %r12, RA2);
+ OCB_INPUT(3, %r13, RA3);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, RB0);
+ OCB_INPUT(5, %r11, RB1);
+ OCB_INPUT(6, %r12, RB2);
+ OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0, (%rcx);
+
+ movq (0 * 8)(%rsp), %r10;
+ CFI_RESTORE(%r10);
+ movq (1 * 8)(%rsp), %r11;
+ CFI_RESTORE(%r11);
+ movq (2 * 8)(%rsp), %r12;
+ CFI_RESTORE(%r12);
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r13);
+
+ call __sm4_crypt_blk8;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vmovdqu (%r8), RTMP0;
+
+ vpxor (0 * 16)(%rsi), RA0, RA0;
+ vpxor (1 * 16)(%rsi), RA1, RA1;
+ vpxor (2 * 16)(%rsi), RA2, RA2;
+ vpxor (3 * 16)(%rsi), RA3, RA3;
+ vpxor (4 * 16)(%rsi), RB0, RB0;
+ vpxor (5 * 16)(%rsi), RB1, RB1;
+ vpxor (6 * 16)(%rsi), RB2, RB2;
+ vpxor (7 * 16)(%rsi), RB3, RB3;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vpxor RA0, RTMP0, RTMP0;
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vpxor RA1, RTMP0, RTMP0;
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vpxor RA2, RTMP0, RTMP0;
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vpxor RA3, RTMP0, RTMP0;
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vpxor RB0, RTMP0, RTMP0;
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vpxor RB1, RTMP0, RTMP0;
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vpxor RB2, RTMP0, RTMP0;
+ vmovdqu RB3, (7 * 16)(%rsi);
+ vpxor RB3, RTMP0, RTMP0;
+
+ vmovdqu RTMP0, (%r8);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ocb_dec,.-_gcry_sm4_aesni_avx_ocb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ocb_auth
+ELF(.type _gcry_sm4_aesni_avx_ocb_auth,@function;)
+
+_gcry_sm4_aesni_avx_ocb_auth:
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: abuf (8 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[8])
+ */
+ CFI_STARTPROC();
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rdx), RTMP0;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+
+#define OCB_INPUT(n, lreg, xreg) \
+ vmovdqu (n * 16)(%rsi), xreg; \
+ vpxor (lreg), RTMP0, RTMP0; \
+ vpxor RTMP0, xreg, xreg;
+ movq (0 * 8)(%r8), %r10;
+ movq (1 * 8)(%r8), %r11;
+ movq (2 * 8)(%r8), %r12;
+ movq (3 * 8)(%r8), %r13;
+ OCB_INPUT(0, %r10, RA0);
+ OCB_INPUT(1, %r11, RA1);
+ OCB_INPUT(2, %r12, RA2);
+ OCB_INPUT(3, %r13, RA3);
+ movq (4 * 8)(%r8), %r10;
+ movq (5 * 8)(%r8), %r11;
+ movq (6 * 8)(%r8), %r12;
+ movq (7 * 8)(%r8), %r13;
+ OCB_INPUT(4, %r10, RB0);
+ OCB_INPUT(5, %r11, RB1);
+ OCB_INPUT(6, %r12, RB2);
+ OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0, (%rdx);
+
+ movq (0 * 8)(%rsp), %r10;
+ CFI_RESTORE(%r10);
+ movq (1 * 8)(%rsp), %r11;
+ CFI_RESTORE(%r11);
+ movq (2 * 8)(%rsp), %r12;
+ CFI_RESTORE(%r12);
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r13);
+
+ call __sm4_crypt_blk8;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vmovdqu (%rcx), RTMP0;
+ vpxor RB0, RA0, RA0;
+ vpxor RB1, RA1, RA1;
+ vpxor RB2, RA2, RA2;
+ vpxor RB3, RA3, RA3;
+
+ vpxor RTMP0, RA3, RA3;
+ vpxor RA2, RA0, RA0;
+ vpxor RA3, RA1, RA1;
+
+ vpxor RA1, RA0, RA0;
+ vmovdqu RA0, (%rcx);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ocb_auth,.-_gcry_sm4_aesni_avx_ocb_auth;)
+
+#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/sm4-aesni-avx2-amd64.S b/comm/third_party/libgcrypt/cipher/sm4-aesni-avx2-amd64.S
new file mode 100644
index 0000000000..6e46c0dca8
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sm4-aesni-avx2-amd64.S
@@ -0,0 +1,851 @@
+/* sm4-avx2-amd64.S - AVX2 implementation of SM4 cipher
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Based on SM4 AES-NI work by Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+/* vector registers */
+#define RX0 %ymm0
+#define RX1 %ymm1
+#define MASK_4BIT %ymm2
+#define RTMP0 %ymm3
+#define RTMP1 %ymm4
+#define RTMP2 %ymm5
+#define RTMP3 %ymm6
+#define RTMP4 %ymm7
+
+#define RA0 %ymm8
+#define RA1 %ymm9
+#define RA2 %ymm10
+#define RA3 %ymm11
+
+#define RB0 %ymm12
+#define RB1 %ymm13
+#define RB2 %ymm14
+#define RB3 %ymm15
+
+#define RNOT %ymm0
+#define RBSWAP %ymm1
+
+#define RX0x %xmm0
+#define RX1x %xmm1
+#define MASK_4BITx %xmm2
+
+#define RNOTx %xmm0
+#define RBSWAPx %xmm1
+
+#define RTMP0x %xmm3
+#define RTMP1x %xmm4
+#define RTMP2x %xmm5
+#define RTMP3x %xmm6
+#define RTMP4x %xmm7
+
+/**********************************************************************
+ helper macros
+ **********************************************************************/
+
+/* Transpose four 32-bit words between 128-bit vector lanes. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* post-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction. */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/**********************************************************************
+ 16-way SM4 with AES-NI and AVX
+ **********************************************************************/
+
+.text
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+.align 8
+ELF(.type __sm4_crypt_blk16,@function;)
+__sm4_crypt_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * plaintext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * ciphertext blocks
+ */
+ CFI_STARTPROC();
+
+ vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vpbroadcastd (4*(round))(%rdi), RX0; \
+ vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
+ vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
+ vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vextracti128 $1, RX0, RTMP4x; \
+ vextracti128 $1, RX1, RTMP0x; \
+ vaesenclast MASK_4BITx, RX0x, RX0x; \
+ vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
+ vaesenclast MASK_4BITx, RX1x, RX1x; \
+ vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
+ vinserti128 $1, RTMP4x, RX0, RX0; \
+ vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
+ vinserti128 $1, RTMP0x, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __sm4_crypt_blk16,.-__sm4_crypt_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ctr_enc
+ELF(.type _gcry_sm4_aesni_avx2_ctr_enc,@function;)
+_gcry_sm4_aesni_avx2_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ movq 8(%rcx), %rax;
+ bswapq %rax;
+
+ vzeroupper;
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
+ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RTMP4x;
+ vpshufb RTMP3x, RTMP4x, RTMP4x;
+ vmovdqa RTMP4x, RTMP0x;
+ inc_le128(RTMP4x, RNOTx, RTMP1x);
+ vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+ vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 16), %rax;
+ ja .Lhandle_ctr_carry;
+
+ /* construct IVs */
+ vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+ vpshufb RTMP3, RTMP0, RA1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+ vpshufb RTMP3, RTMP0, RA2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+ vpshufb RTMP3, RTMP0, RA3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+ vpshufb RTMP3, RTMP0, RB0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+ vpshufb RTMP3, RTMP0, RB1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+ vpshufb RTMP3, RTMP0, RB2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+ vpshufb RTMP3, RTMP0, RB3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+ vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+ jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vextracti128 $1, RTMP0, RTMP0x;
+ vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+ /* store new IV */
+ vmovdqu RTMP0x, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ctr_enc,.-_gcry_sm4_aesni_avx2_ctr_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_cbc_dec
+ELF(.type _gcry_sm4_aesni_avx2_cbc_dec,@function;)
+_gcry_sm4_aesni_avx2_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ vmovdqu (0 * 32)(%rdx), RA0;
+ vmovdqu (1 * 32)(%rdx), RA1;
+ vmovdqu (2 * 32)(%rdx), RA2;
+ vmovdqu (3 * 32)(%rdx), RA3;
+ vmovdqu (4 * 32)(%rdx), RB0;
+ vmovdqu (5 * 32)(%rdx), RB1;
+ vmovdqu (6 * 32)(%rdx), RB2;
+ vmovdqu (7 * 32)(%rdx), RB3;
+
+ call __sm4_crypt_blk16;
+
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RNOT;
+ vpxor RNOT, RA0, RA0;
+ vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
+ vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
+ vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
+ vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
+ vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+ vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
+ vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_cbc_dec,.-_gcry_sm4_aesni_avx2_cbc_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_cfb_dec
+ELF(.type _gcry_sm4_aesni_avx2_cfb_dec,@function;)
+_gcry_sm4_aesni_avx2_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ /* Load input */
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RA0;
+ vmovdqu (0 * 32 + 16)(%rdx), RA1;
+ vmovdqu (1 * 32 + 16)(%rdx), RA2;
+ vmovdqu (2 * 32 + 16)(%rdx), RA3;
+ vmovdqu (3 * 32 + 16)(%rdx), RB0;
+ vmovdqu (4 * 32 + 16)(%rdx), RB1;
+ vmovdqu (5 * 32 + 16)(%rdx), RB2;
+ vmovdqu (6 * 32 + 16)(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_cfb_dec,.-_gcry_sm4_aesni_avx2_cfb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ocb_enc
+ELF(.type _gcry_sm4_aesni_avx2_ocb_enc,@function;)
+
+_gcry_sm4_aesni_avx2_ocb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rcx), RTMP0x;
+ vmovdqu (%r8), RTMP1x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RTMP1, RTMP1; \
+ vpxor yreg, RNOT, yreg; \
+ vmovdqu RNOT, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RA1);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, RA2);
+ OCB_INPUT(3, %r12, %r13, RA3);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, RB0);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, RB2);
+ OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vmovdqu RTMP0x, (%rcx);
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%r8);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __sm4_crypt_blk16;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vpxor (0 * 32)(%rsi), RA0, RA0;
+ vpxor (1 * 32)(%rsi), RA1, RA1;
+ vpxor (2 * 32)(%rsi), RA2, RA2;
+ vpxor (3 * 32)(%rsi), RA3, RA3;
+ vpxor (4 * 32)(%rsi), RB0, RB0;
+ vpxor (5 * 32)(%rsi), RB1, RB1;
+ vpxor (6 * 32)(%rsi), RB2, RB2;
+ vpxor (7 * 32)(%rsi), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ocb_enc,.-_gcry_sm4_aesni_avx2_ocb_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ocb_dec
+ELF(.type _gcry_sm4_aesni_avx2_ocb_dec,@function;)
+
+_gcry_sm4_aesni_avx2_ocb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rcx), RTMP0x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RNOT, yreg; \
+ vmovdqu RNOT, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RA1);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, RA2);
+ OCB_INPUT(3, %r12, %r13, RA3);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, RB0);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, RB2);
+ OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0x, (%rcx);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __sm4_crypt_blk16;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vmovdqu (%r8), RTMP1x;
+
+ vpxor (0 * 32)(%rsi), RA0, RA0;
+ vpxor (1 * 32)(%rsi), RA1, RA1;
+ vpxor (2 * 32)(%rsi), RA2, RA2;
+ vpxor (3 * 32)(%rsi), RA3, RA3;
+ vpxor (4 * 32)(%rsi), RB0, RB0;
+ vpxor (5 * 32)(%rsi), RB1, RB1;
+ vpxor (6 * 32)(%rsi), RB2, RB2;
+ vpxor (7 * 32)(%rsi), RB3, RB3;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vpxor RA0, RTMP1, RTMP1;
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vpxor RA1, RTMP1, RTMP1;
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vpxor RA2, RTMP1, RTMP1;
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vpxor RA3, RTMP1, RTMP1;
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vpxor RB0, RTMP1, RTMP1;
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vpxor RB1, RTMP1, RTMP1;
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vpxor RB2, RTMP1, RTMP1;
+ vmovdqu RB3, (7 * 32)(%rsi);
+ vpxor RB3, RTMP1, RTMP1;
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%r8);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ocb_dec,.-_gcry_sm4_aesni_avx2_ocb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ocb_auth
+ELF(.type _gcry_sm4_aesni_avx2_ocb_auth,@function;)
+
+_gcry_sm4_aesni_avx2_ocb_auth:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: abuf (16 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rdx), RTMP0x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rsi), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RNOT, yreg;
+
+ movq (0 * 8)(%r8), %r10;
+ movq (1 * 8)(%r8), %r11;
+ movq (2 * 8)(%r8), %r12;
+ movq (3 * 8)(%r8), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RA1);
+ movq (4 * 8)(%r8), %r10;
+ movq (5 * 8)(%r8), %r11;
+ movq (6 * 8)(%r8), %r12;
+ movq (7 * 8)(%r8), %r13;
+ OCB_INPUT(2, %r10, %r11, RA2);
+ OCB_INPUT(3, %r12, %r13, RA3);
+ movq (8 * 8)(%r8), %r10;
+ movq (9 * 8)(%r8), %r11;
+ movq (10 * 8)(%r8), %r12;
+ movq (11 * 8)(%r8), %r13;
+ OCB_INPUT(4, %r10, %r11, RB0);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r8), %r10;
+ movq (13 * 8)(%r8), %r11;
+ movq (14 * 8)(%r8), %r12;
+ movq (15 * 8)(%r8), %r13;
+ OCB_INPUT(6, %r10, %r11, RB2);
+ OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0x, (%rdx);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __sm4_crypt_blk16;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vpxor RA0, RB0, RA0;
+ vpxor RA1, RB1, RA1;
+ vpxor RA2, RB2, RA2;
+ vpxor RA3, RB3, RA3;
+
+ vpxor RA1, RA0, RA0;
+ vpxor RA3, RA2, RA2;
+
+ vpxor RA2, RA0, RTMP1;
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vpxor (%rcx), RTMP1x, RTMP1x;
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%rcx);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ocb_auth,.-_gcry_sm4_aesni_avx2_ocb_auth;)
+
+#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/sm4.c b/comm/third_party/libgcrypt/cipher/sm4.c
new file mode 100644
index 0000000000..c8dd0406e1
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/sm4.c
@@ -0,0 +1,1251 @@
+/* sm4.c - SM4 Cipher Algorithm
+ * Copyright (C) 2020 Alibaba Group.
+ * Copyright (C) 2020 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "bithelp.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+/* Helper macro to force alignment to 64 bytes. */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_64 __attribute__ ((aligned (64)))
+#else
+# define ATTR_ALIGNED_64
+#endif
+
+/* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */
+#undef USE_AESNI_AVX
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AESNI_AVX 1
+# endif
+#endif
+
+/* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX2 code. */
+#undef USE_AESNI_AVX2
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AESNI_AVX2 1
+# endif
+#endif
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+# define ASM_FUNC_ABI
+# endif
+#endif
+
+static const char *sm4_selftest (void);
+
+static void _gcry_sm4_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_sm4_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_sm4_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static size_t _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+static size_t _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
+
+typedef struct
+{
+ u32 rkey_enc[32];
+ u32 rkey_dec[32];
+#ifdef USE_AESNI_AVX
+ unsigned int use_aesni_avx:1;
+#endif
+#ifdef USE_AESNI_AVX2
+ unsigned int use_aesni_avx2:1;
+#endif
+} SM4_context;
+
+static const u32 fk[4] =
+{
+ 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static struct
+{
+ volatile u32 counter_head;
+ u32 cacheline_align[64 / 4 - 1];
+ byte S[256];
+ volatile u32 counter_tail;
+} sbox_table ATTR_ALIGNED_64 =
+ {
+ 0,
+ { 0, },
+ {
+ 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+ 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+ 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+ 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+ 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+ 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+ 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+ 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+ 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+ 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+ 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+ 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+ 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+ 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+ 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+ 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+ 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+ 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+ 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+ 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+ 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+ 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+ 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+ 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+ 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+ 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+ 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+ 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+ 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+ 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+ 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+ 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+ },
+ 0
+ };
+
+static const u32 ck[] =
+{
+ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+ 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+ 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+ 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+ 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+ 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+ 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+ 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+#ifdef USE_AESNI_AVX
+extern void _gcry_sm4_aesni_avx_expand_key(const byte *key, u32 *rk_enc,
+ u32 *rk_dec, const u32 *fk,
+ const u32 *ck) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ctr_enc(const u32 *rk_enc, byte *out,
+ const byte *in, byte *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_cbc_dec(const u32 *rk_dec, byte *out,
+ const byte *in, byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_cfb_dec(const u32 *rk_enc, byte *out,
+ const byte *in, byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ocb_enc(const u32 *rk_enc,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ocb_dec(const u32 *rk_dec,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ocb_auth(const u32 *rk_enc,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern unsigned int
+_gcry_sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks) ASM_FUNC_ABI;
+
+static inline unsigned int
+sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks)
+{
+ return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks);
+}
+
+#endif /* USE_AESNI_AVX */
+
+#ifdef USE_AESNI_AVX2
+extern void _gcry_sm4_aesni_avx2_ctr_enc(const u32 *rk_enc, byte *out,
+ const byte *in,
+ byte *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_cbc_dec(const u32 *rk_dec, byte *out,
+ const byte *in,
+ byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_cfb_dec(const u32 *rk_enc, byte *out,
+ const byte *in,
+ byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_ocb_enc(const u32 *rk_enc,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_ocb_dec(const u32 *rk_dec,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_ocb_auth(const u32 *rk_enc,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+#endif /* USE_AESNI_AVX2 */
+
+static inline void prefetch_sbox_table(void)
+{
+ const volatile byte *vtab = (void *)&sbox_table;
+
+ /* Modify counters to trigger copy-on-write and unsharing if physical pages
+ * of look-up table are shared between processes. Modifying counters also
+ * causes checksums for pages to change and hint same-page merging algorithm
+ * that these pages are frequently changing. */
+ sbox_table.counter_head++;
+ sbox_table.counter_tail++;
+
+ /* Prefetch look-up table to cache. */
+ (void)vtab[0 * 32];
+ (void)vtab[1 * 32];
+ (void)vtab[2 * 32];
+ (void)vtab[3 * 32];
+ (void)vtab[4 * 32];
+ (void)vtab[5 * 32];
+ (void)vtab[6 * 32];
+ (void)vtab[7 * 32];
+ (void)vtab[8 * 32 - 1];
+}
+
+static inline u32 sm4_t_non_lin_sub(u32 x)
+{
+ u32 out;
+
+ out = (u32)sbox_table.S[(x >> 0) & 0xff] << 0;
+ out |= (u32)sbox_table.S[(x >> 8) & 0xff] << 8;
+ out |= (u32)sbox_table.S[(x >> 16) & 0xff] << 16;
+ out |= (u32)sbox_table.S[(x >> 24) & 0xff] << 24;
+
+ return out;
+}
+
+static inline u32 sm4_key_lin_sub(u32 x)
+{
+ return x ^ rol(x, 13) ^ rol(x, 23);
+}
+
+static inline u32 sm4_enc_lin_sub(u32 x)
+{
+ u32 xrol2 = rol(x, 2);
+ return x ^ xrol2 ^ rol(xrol2, 8) ^ rol(xrol2, 16) ^ rol(x, 24);
+}
+
+static inline u32 sm4_key_sub(u32 x)
+{
+ return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_enc_sub(u32 x)
+{
+ return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32
+sm4_round(const u32 x0, const u32 x1, const u32 x2, const u32 x3, const u32 rk)
+{
+ return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
+}
+
+static void
+sm4_expand_key (SM4_context *ctx, const byte *key)
+{
+ u32 rk[4];
+ int i;
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ _gcry_sm4_aesni_avx_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
+ fk, ck);
+ return;
+ }
+#endif
+
+ rk[0] = buf_get_be32(key + 4 * 0) ^ fk[0];
+ rk[1] = buf_get_be32(key + 4 * 1) ^ fk[1];
+ rk[2] = buf_get_be32(key + 4 * 2) ^ fk[2];
+ rk[3] = buf_get_be32(key + 4 * 3) ^ fk[3];
+
+ for (i = 0; i < 32; i += 4)
+ {
+ rk[0] = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
+ rk[1] = rk[1] ^ sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
+ rk[2] = rk[2] ^ sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
+ rk[3] = rk[3] ^ sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
+ ctx->rkey_enc[i + 0] = rk[0];
+ ctx->rkey_enc[i + 1] = rk[1];
+ ctx->rkey_enc[i + 2] = rk[2];
+ ctx->rkey_enc[i + 3] = rk[3];
+ ctx->rkey_dec[31 - i - 0] = rk[0];
+ ctx->rkey_dec[31 - i - 1] = rk[1];
+ ctx->rkey_dec[31 - i - 2] = rk[2];
+ ctx->rkey_dec[31 - i - 3] = rk[3];
+ }
+
+ wipememory (rk, sizeof(rk));
+}
+
+static gcry_err_code_t
+sm4_setkey (void *context, const byte *key, const unsigned keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ SM4_context *ctx = context;
+ static int init = 0;
+ static const char *selftest_failed = NULL;
+ unsigned int hwf = _gcry_get_hw_features ();
+
+ (void)hwf;
+
+ if (!init)
+ {
+ init = 1;
+ selftest_failed = sm4_selftest();
+ if (selftest_failed)
+ log_error("%s\n", selftest_failed);
+ }
+ if (selftest_failed)
+ return GPG_ERR_SELFTEST_FAILED;
+
+ if (keylen != 16)
+ return GPG_ERR_INV_KEYLEN;
+
+#ifdef USE_AESNI_AVX
+ ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
+#endif
+#ifdef USE_AESNI_AVX2
+ ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
+#endif
+
+ /* Setup bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cbc_dec = _gcry_sm4_cbc_dec;
+ bulk_ops->cfb_dec = _gcry_sm4_cfb_dec;
+ bulk_ops->ctr_enc = _gcry_sm4_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_sm4_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_sm4_ocb_auth;
+
+ sm4_expand_key (ctx, key);
+ return 0;
+}
+
+static unsigned int
+sm4_do_crypt (const u32 *rk, byte *out, const byte *in)
+{
+ u32 x[4];
+ int i;
+
+ x[0] = buf_get_be32(in + 0 * 4);
+ x[1] = buf_get_be32(in + 1 * 4);
+ x[2] = buf_get_be32(in + 2 * 4);
+ x[3] = buf_get_be32(in + 3 * 4);
+
+ for (i = 0; i < 32; i += 4)
+ {
+ x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
+ x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
+ x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
+ x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
+ }
+
+ buf_put_be32(out + 0 * 4, x[3 - 0]);
+ buf_put_be32(out + 1 * 4, x[3 - 1]);
+ buf_put_be32(out + 2 * 4, x[3 - 2]);
+ buf_put_be32(out + 3 * 4, x[3 - 3]);
+
+ return /*burn_stack*/ 4*6+sizeof(void*)*4;
+}
+
+static unsigned int
+sm4_encrypt (void *context, byte *outbuf, const byte *inbuf)
+{
+ SM4_context *ctx = context;
+
+ prefetch_sbox_table ();
+
+ return sm4_do_crypt (ctx->rkey_enc, outbuf, inbuf);
+}
+
+static unsigned int
+sm4_decrypt (void *context, byte *outbuf, const byte *inbuf)
+{
+ SM4_context *ctx = context;
+
+ prefetch_sbox_table ();
+
+ return sm4_do_crypt (ctx->rkey_dec, outbuf, inbuf);
+}
+
+static unsigned int
+sm4_do_crypt_blks2 (const u32 *rk, byte *out, const byte *in)
+{
+ u32 x[4];
+ u32 y[4];
+ u32 k;
+ int i;
+
+ /* Encrypts/Decrypts two blocks for higher instruction level
+ * parallelism. */
+
+ x[0] = buf_get_be32(in + 0 * 4);
+ x[1] = buf_get_be32(in + 1 * 4);
+ x[2] = buf_get_be32(in + 2 * 4);
+ x[3] = buf_get_be32(in + 3 * 4);
+ y[0] = buf_get_be32(in + 4 * 4);
+ y[1] = buf_get_be32(in + 5 * 4);
+ y[2] = buf_get_be32(in + 6 * 4);
+ y[3] = buf_get_be32(in + 7 * 4);
+
+ for (i = 0; i < 32; i += 4)
+ {
+ k = rk[i + 0];
+ x[0] = sm4_round(x[0], x[1], x[2], x[3], k);
+ y[0] = sm4_round(y[0], y[1], y[2], y[3], k);
+ k = rk[i + 1];
+ x[1] = sm4_round(x[1], x[2], x[3], x[0], k);
+ y[1] = sm4_round(y[1], y[2], y[3], y[0], k);
+ k = rk[i + 2];
+ x[2] = sm4_round(x[2], x[3], x[0], x[1], k);
+ y[2] = sm4_round(y[2], y[3], y[0], y[1], k);
+ k = rk[i + 3];
+ x[3] = sm4_round(x[3], x[0], x[1], x[2], k);
+ y[3] = sm4_round(y[3], y[0], y[1], y[2], k);
+ }
+
+ buf_put_be32(out + 0 * 4, x[3 - 0]);
+ buf_put_be32(out + 1 * 4, x[3 - 1]);
+ buf_put_be32(out + 2 * 4, x[3 - 2]);
+ buf_put_be32(out + 3 * 4, x[3 - 3]);
+ buf_put_be32(out + 4 * 4, y[3 - 0]);
+ buf_put_be32(out + 5 * 4, y[3 - 1]);
+ buf_put_be32(out + 6 * 4, y[3 - 2]);
+ buf_put_be32(out + 7 * 4, y[3 - 3]);
+
+ return /*burn_stack*/ 4*10+sizeof(void*)*4;
+}
+
+static unsigned int
+sm4_crypt_blocks (const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks)
+{
+ unsigned int burn_depth = 0;
+ unsigned int nburn;
+
+ while (num_blks >= 2)
+ {
+ nburn = sm4_do_crypt_blks2 (rk, out, in);
+ burn_depth = nburn > burn_depth ? nburn : burn_depth;
+ out += 2 * 16;
+ in += 2 * 16;
+ num_blks -= 2;
+ }
+
+ while (num_blks)
+ {
+ nburn = sm4_do_crypt (rk, out, in);
+ burn_depth = nburn > burn_depth ? nburn : burn_depth;
+ out += 16;
+ in += 16;
+ num_blks--;
+ }
+
+ if (burn_depth)
+ burn_depth += sizeof(void *) * 5;
+ return burn_depth;
+}
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size 16. */
+static void
+_gcry_sm4_ctr_enc(void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ SM4_context *ctx = context;
+ byte *outbuf = outbuf_arg;
+ const byte *inbuf = inbuf_arg;
+ int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_sm4_aesni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
+
+ nblocks -= 16;
+ outbuf += 16 * 16;
+ inbuf += 16 * 16;
+ }
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_sm4_aesni_avx_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
+
+ nblocks -= 8;
+ outbuf += 8 * 16;
+ inbuf += 8 * 16;
+ }
+ }
+#endif
+
+ /* Process remaining blocks. */
+ if (nblocks)
+ {
+ unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks);
+ byte tmpbuf[16 * 8];
+ unsigned int tmp_used = 16;
+
+ if (0)
+ ;
+#ifdef USE_AESNI_AVX
+ else if (ctx->use_aesni_avx)
+ {
+ crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+ }
+#endif
+ else
+ {
+ prefetch_sbox_table ();
+ crypt_blk1_8 = sm4_crypt_blocks;
+ }
+
+ /* Process remaining blocks. */
+ while (nblocks)
+ {
+ size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+ size_t i;
+
+ if (curr_blks * 16 > tmp_used)
+ tmp_used = curr_blks * 16;
+
+ cipher_block_cpy (tmpbuf + 0 * 16, ctr, 16);
+ for (i = 1; i < curr_blks; i++)
+ {
+ cipher_block_cpy (&tmpbuf[i * 16], ctr, 16);
+ cipher_block_add (&tmpbuf[i * 16], i, 16);
+ }
+ cipher_block_add (ctr, curr_blks, 16);
+
+ burn_stack_depth = crypt_blk1_8 (ctx->rkey_enc, tmpbuf, tmpbuf,
+ curr_blks);
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ cipher_block_xor (outbuf, &tmpbuf[i * 16], inbuf, 16);
+ outbuf += 16;
+ inbuf += 16;
+ }
+
+ nblocks -= curr_blks;
+ }
+
+ wipememory(tmpbuf, tmp_used);
+ }
+
+ if (burn_stack_depth)
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_sm4_cbc_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ SM4_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_sm4_aesni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * 16;
+ inbuf += 16 * 16;
+ }
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_sm4_aesni_avx_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * 16;
+ inbuf += 8 * 16;
+ }
+ }
+#endif
+
+ /* Process remaining blocks. */
+ if (nblocks)
+ {
+ unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks);
+ unsigned char savebuf[16 * 8];
+ unsigned int tmp_used = 16;
+
+ if (0)
+ ;
+#ifdef USE_AESNI_AVX
+ else if (ctx->use_aesni_avx)
+ {
+ crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+ }
+#endif
+ else
+ {
+ prefetch_sbox_table ();
+ crypt_blk1_8 = sm4_crypt_blocks;
+ }
+
+ /* Process remaining blocks. */
+ while (nblocks)
+ {
+ size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+ size_t i;
+
+ if (curr_blks * 16 > tmp_used)
+ tmp_used = curr_blks * 16;
+
+ burn_stack_depth = crypt_blk1_8 (ctx->rkey_dec, savebuf, inbuf,
+ curr_blks);
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ cipher_block_xor_n_copy_2(outbuf, &savebuf[i * 16], iv, inbuf,
+ 16);
+ outbuf += 16;
+ inbuf += 16;
+ }
+
+ nblocks -= curr_blks;
+ }
+
+ wipememory(savebuf, tmp_used);
+ }
+
+ if (burn_stack_depth)
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_sm4_cfb_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ SM4_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_sm4_aesni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * 16;
+ inbuf += 16 * 16;
+ }
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_sm4_aesni_avx_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * 16;
+ inbuf += 8 * 16;
+ }
+ }
+#endif
+
+ /* Process remaining blocks. */
+ if (nblocks)
+ {
+ unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks);
+ unsigned char ivbuf[16 * 8];
+ unsigned int tmp_used = 16;
+
+ if (0)
+ ;
+#ifdef USE_AESNI_AVX
+ else if (ctx->use_aesni_avx)
+ {
+ crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+ }
+#endif
+ else
+ {
+ prefetch_sbox_table ();
+ crypt_blk1_8 = sm4_crypt_blocks;
+ }
+
+ /* Process remaining blocks. */
+ while (nblocks)
+ {
+ size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+ size_t i;
+
+ if (curr_blks * 16 > tmp_used)
+ tmp_used = curr_blks * 16;
+
+ cipher_block_cpy (&ivbuf[0 * 16], iv, 16);
+ for (i = 1; i < curr_blks; i++)
+ cipher_block_cpy (&ivbuf[i * 16], &inbuf[(i - 1) * 16], 16);
+ cipher_block_cpy (iv, &inbuf[(i - 1) * 16], 16);
+
+ burn_stack_depth = crypt_blk1_8 (ctx->rkey_enc, ivbuf, ivbuf,
+ curr_blks);
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ cipher_block_xor (outbuf, inbuf, &ivbuf[i * 16], 16);
+ outbuf += 16;
+ inbuf += 16;
+ }
+
+ nblocks -= curr_blks;
+ }
+
+ wipememory(ivbuf, tmp_used);
+ }
+
+ if (burn_stack_depth)
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ SM4_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+ int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ if (encrypt)
+ _gcry_sm4_aesni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
+ c->u_iv.iv, c->u_ctr.ctr, Ls);
+ else
+ _gcry_sm4_aesni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
+ c->u_iv.iv, c->u_ctr.ctr, Ls);
+
+ nblocks -= 16;
+ outbuf += 16 * 16;
+ inbuf += 16 * 16;
+ }
+ }
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ u64 Ls[8];
+ unsigned int n = 8 - (blkn % 8);
+ u64 *l;
+
+ if (nblocks >= 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ blkn += 8;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+ if (encrypt)
+ _gcry_sm4_aesni_avx_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
+ c->u_iv.iv, c->u_ctr.ctr, Ls);
+ else
+ _gcry_sm4_aesni_avx_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
+ c->u_iv.iv, c->u_ctr.ctr, Ls);
+
+ nblocks -= 8;
+ outbuf += 8 * 16;
+ inbuf += 8 * 16;
+ }
+ }
+ }
+#endif
+
+ if (nblocks)
+ {
+ unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks);
+ const u32 *rk = encrypt ? ctx->rkey_enc : ctx->rkey_dec;
+ unsigned char tmpbuf[16 * 8];
+ unsigned int tmp_used = 16;
+
+ if (0)
+ ;
+#ifdef USE_AESNI_AVX
+ else if (ctx->use_aesni_avx)
+ {
+ crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+ }
+#endif
+ else
+ {
+ prefetch_sbox_table ();
+ crypt_blk1_8 = sm4_crypt_blocks;
+ }
+
+ while (nblocks)
+ {
+ size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+ size_t i;
+
+ if (curr_blks * 16 > tmp_used)
+ tmp_used = curr_blks * 16;
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ const unsigned char *l = ocb_get_l(c, ++blkn);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ if (encrypt)
+ cipher_block_xor_1(c->u_ctr.ctr, &inbuf[i * 16], 16);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_2dst (&tmpbuf[i * 16], c->u_iv.iv, l, 16);
+ cipher_block_xor (&outbuf[i * 16], &inbuf[i * 16],
+ c->u_iv.iv, 16);
+ }
+
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ crypt_blk1_8 (rk, outbuf, outbuf, curr_blks);
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ cipher_block_xor_1 (&outbuf[i * 16], &tmpbuf[i * 16], 16);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ if (!encrypt)
+ cipher_block_xor_1(c->u_ctr.ctr, &outbuf[i * 16], 16);
+ }
+
+ outbuf += curr_blks * 16;
+ inbuf += curr_blks * 16;
+ nblocks -= curr_blks;
+ }
+
+ wipememory(tmpbuf, tmp_used);
+ }
+
+ c->u_mode.ocb.data_nblocks = blkn;
+
+ if (burn_stack_depth)
+ _gcry_burn_stack(burn_stack_depth);
+
+ return 0;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
+{
+ SM4_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2)
+ {
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ _gcry_sm4_aesni_avx2_ocb_auth(ctx->rkey_enc, abuf,
+ c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 16;
+ abuf += 16 * 16;
+ }
+ }
+ }
+#endif
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx)
+ {
+ u64 Ls[8];
+ unsigned int n = 8 - (blkn % 8);
+ u64 *l;
+
+ if (nblocks >= 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ blkn += 8;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+ _gcry_sm4_aesni_avx_ocb_auth(ctx->rkey_enc, abuf,
+ c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 8;
+ abuf += 8 * 16;
+ }
+ }
+ }
+#endif
+
+ if (nblocks)
+ {
+ unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+ unsigned int num_blks);
+ unsigned char tmpbuf[16 * 8];
+ unsigned int tmp_used = 16;
+
+ if (0)
+ ;
+#ifdef USE_AESNI_AVX
+ else if (ctx->use_aesni_avx)
+ {
+ crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+ }
+#endif
+ else
+ {
+ prefetch_sbox_table ();
+ crypt_blk1_8 = sm4_crypt_blocks;
+ }
+
+ while (nblocks)
+ {
+ size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+ size_t i;
+
+ if (curr_blks * 16 > tmp_used)
+ tmp_used = curr_blks * 16;
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ const unsigned char *l = ocb_get_l(c, ++blkn);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ cipher_block_xor_2dst (&tmpbuf[i * 16],
+ c->u_mode.ocb.aad_offset, l, 16);
+ cipher_block_xor_1 (&tmpbuf[i * 16], &abuf[i * 16], 16);
+ }
+
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ crypt_blk1_8 (ctx->rkey_enc, tmpbuf, tmpbuf, curr_blks);
+
+ for (i = 0; i < curr_blks; i++)
+ {
+ cipher_block_xor_1 (c->u_mode.ocb.aad_sum, &tmpbuf[i * 16], 16);
+ }
+
+ abuf += curr_blks * 16;
+ nblocks -= curr_blks;
+ }
+
+ wipememory(tmpbuf, tmp_used);
+ }
+
+ c->u_mode.ocb.aad_nblocks = blkn;
+
+ return 0;
+}
+
+/* Run the self-tests for SM4-CTR, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+ const int nblocks = 16 - 1;
+ const int blocksize = 16;
+ const int context_size = sizeof(SM4_context);
+
+ return _gcry_selftest_helper_ctr("SM4", &sm4_setkey,
+ &sm4_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for SM4-CBC, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+ const int nblocks = 16 - 1;
+ const int blocksize = 16;
+ const int context_size = sizeof(SM4_context);
+
+ return _gcry_selftest_helper_cbc("SM4", &sm4_setkey,
+ &sm4_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for SM4-CFB, tests bulk CFB decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+ const int nblocks = 16 - 1;
+ const int blocksize = 16;
+ const int context_size = sizeof(SM4_context);
+
+ return _gcry_selftest_helper_cfb("SM4", &sm4_setkey,
+ &sm4_encrypt, nblocks, blocksize, context_size);
+}
+
+static const char *
+sm4_selftest (void)
+{
+ SM4_context ctx;
+ byte scratch[16];
+ const char *r;
+
+ static const byte plaintext[16] = {
+ 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
+ 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
+ };
+ static const byte key[16] = {
+ 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
+ 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
+ };
+ static const byte ciphertext[16] = {
+ 0x68, 0x1E, 0xDF, 0x34, 0xD2, 0x06, 0x96, 0x5E,
+ 0x86, 0xB3, 0xE9, 0x4F, 0x53, 0x6E, 0x42, 0x46
+ };
+
+ memset (&ctx, 0, sizeof(ctx));
+
+ sm4_expand_key (&ctx, key);
+ sm4_encrypt (&ctx, scratch, plaintext);
+ if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
+ return "SM4 test encryption failed.";
+ sm4_decrypt (&ctx, scratch, scratch);
+ if (memcmp (scratch, plaintext, sizeof (plaintext)))
+ return "SM4 test decryption failed.";
+
+ if ( (r = selftest_ctr_128 ()) )
+ return r;
+
+ if ( (r = selftest_cbc_128 ()) )
+ return r;
+
+ if ( (r = selftest_cfb_128 ()) )
+ return r;
+
+ return NULL;
+}
+
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+ const char *what;
+ const char *errtxt;
+
+ (void)extended;
+
+ if (algo != GCRY_CIPHER_SM4)
+ return GPG_ERR_CIPHER_ALGO;
+
+ what = "selftest";
+ errtxt = sm4_selftest ();
+ if (errtxt)
+ goto failed;
+
+ return 0;
+
+ failed:
+ if (report)
+ report ("cipher", GCRY_CIPHER_SM4, what, errtxt);
+ return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gcry_cipher_oid_spec_t sm4_oids[] =
+ {
+ { "1.2.156.10197.1.104.1", GCRY_CIPHER_MODE_ECB },
+ { "1.2.156.10197.1.104.2", GCRY_CIPHER_MODE_CBC },
+ { "1.2.156.10197.1.104.3", GCRY_CIPHER_MODE_OFB },
+ { "1.2.156.10197.1.104.4", GCRY_CIPHER_MODE_CFB },
+ { "1.2.156.10197.1.104.7", GCRY_CIPHER_MODE_CTR },
+ { NULL }
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_sm4 =
+ {
+ GCRY_CIPHER_SM4, {0, 0},
+ "SM4", NULL, sm4_oids, 16, 128,
+ sizeof (SM4_context),
+ sm4_setkey, sm4_encrypt, sm4_decrypt,
+ NULL, NULL,
+ run_selftests
+ };
diff --git a/comm/third_party/libgcrypt/cipher/stribog.c b/comm/third_party/libgcrypt/cipher/stribog.c
new file mode 100644
index 0000000000..f8776a3e8f
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/stribog.c
@@ -0,0 +1,1362 @@
+/* stribog.c - GOST R 34.11-2012 (Stribog) hash function
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+typedef struct
+{
+ gcry_md_block_ctx_t bctx;
+ union
+ {
+ u64 h[8];
+ unsigned char result[64];
+ };
+ u64 N[8];
+ u64 Sigma[8];
+} STRIBOG_CONTEXT;
+
+
+/* Pre-computed results of multiplication of bytes on A and reordered with
+ Pi[]. */
+static const u64 stribog_table[8][256] =
+{
+ /* 0 */
+ { U64_C(0xd01f715b5c7ef8e6), U64_C(0x16fa240980778325),
+ U64_C(0xa8a42e857ee049c8), U64_C(0x6ac1068fa186465b),
+ U64_C(0x6e417bd7a2e9320b), U64_C(0x665c8167a437daab),
+ U64_C(0x7666681aa89617f6), U64_C(0x4b959163700bdcf5),
+ U64_C(0xf14be6b78df36248), U64_C(0xc585bd689a625cff),
+ U64_C(0x9557d7fca67d82cb), U64_C(0x89f0b969af6dd366),
+ U64_C(0xb0833d48749f6c35), U64_C(0xa1998c23b1ecbc7c),
+ U64_C(0x8d70c431ac02a736), U64_C(0xd6dfbc2fd0a8b69e),
+ U64_C(0x37aeb3e551fa198b), U64_C(0x0b7d128a40b5cf9c),
+ U64_C(0x5a8f2008b5780cbc), U64_C(0xedec882284e333e5),
+ U64_C(0xd25fc177d3c7c2ce), U64_C(0x5e0f5d50b61778ec),
+ U64_C(0x1d873683c0c24cb9), U64_C(0xad040bcbb45d208c),
+ U64_C(0x2f89a0285b853c76), U64_C(0x5732fff6791b8d58),
+ U64_C(0x3e9311439ef6ec3f), U64_C(0xc9183a809fd3c00f),
+ U64_C(0x83adf3f5260a01ee), U64_C(0xa6791941f4e8ef10),
+ U64_C(0x103ae97d0ca1cd5d), U64_C(0x2ce948121dee1b4a),
+ U64_C(0x39738421dbf2bf53), U64_C(0x093da2a6cf0cf5b4),
+ U64_C(0xcd9847d89cbcb45f), U64_C(0xf9561c078b2d8ae8),
+ U64_C(0x9c6a755a6971777f), U64_C(0xbc1ebaa0712ef0c5),
+ U64_C(0x72e61542abf963a6), U64_C(0x78bb5fde229eb12e),
+ U64_C(0x14ba94250fceb90d), U64_C(0x844d6697630e5282),
+ U64_C(0x98ea08026a1e032f), U64_C(0xf06bbea144217f5c),
+ U64_C(0xdb6263d11ccb377a), U64_C(0x641c314b2b8ee083),
+ U64_C(0x320e96ab9b4770cf), U64_C(0x1ee7deb986a96b85),
+ U64_C(0xe96cf57a878c47b5), U64_C(0xfdd6615f8842feb8),
+ U64_C(0xc83862965601dd1b), U64_C(0x2ea9f83e92572162),
+ U64_C(0xf876441142ff97fc), U64_C(0xeb2c455608357d9d),
+ U64_C(0x5612a7e0b0c9904c), U64_C(0x6c01cbfb2d500823),
+ U64_C(0x4548a6a7fa037a2d), U64_C(0xabc4c6bf388b6ef4),
+ U64_C(0xbade77d4fdf8bebd), U64_C(0x799b07c8eb4cac3a),
+ U64_C(0x0c9d87e805b19cf0), U64_C(0xcb588aac106afa27),
+ U64_C(0xea0c1d40c1e76089), U64_C(0x2869354a1e816f1a),
+ U64_C(0xff96d17307fbc490), U64_C(0x9f0a9d602f1a5043),
+ U64_C(0x96373fc6e016a5f7), U64_C(0x5292dab8b3a6e41c),
+ U64_C(0x9b8ae0382c752413), U64_C(0x4f15ec3b7364a8a5),
+ U64_C(0x3fb349555724f12b), U64_C(0xc7c50d4415db66d7),
+ U64_C(0x92b7429ee379d1a7), U64_C(0xd37f99611a15dfda),
+ U64_C(0x231427c05e34a086), U64_C(0xa439a96d7b51d538),
+ U64_C(0xb403401077f01865), U64_C(0xdda2aea5901d7902),
+ U64_C(0x0a5d4a9c8967d288), U64_C(0xc265280adf660f93),
+ U64_C(0x8bb0094520d4e94e), U64_C(0x2a29856691385532),
+ U64_C(0x42a833c5bf072941), U64_C(0x73c64d54622b7eb2),
+ U64_C(0x07e095624504536c), U64_C(0x8a905153e906f45a),
+ U64_C(0x6f6123c16b3b2f1f), U64_C(0xc6e55552dc097bc3),
+ U64_C(0x4468feb133d16739), U64_C(0xe211e7f0c7398829),
+ U64_C(0xa2f96419f7879b40), U64_C(0x19074bdbc3ad38e9),
+ U64_C(0xf4ebc3f9474e0b0c), U64_C(0x43886bd376d53455),
+ U64_C(0xd8028beb5aa01046), U64_C(0x51f23282f5cdc320),
+ U64_C(0xe7b1c2be0d84e16d), U64_C(0x081dfab006dee8a0),
+ U64_C(0x3b33340d544b857b), U64_C(0x7f5bcabc679ae242),
+ U64_C(0x0edd37c48a08a6d8), U64_C(0x81ed43d9a9b33bc6),
+ U64_C(0xb1a3655ebd4d7121), U64_C(0x69a1eeb5e7ed6167),
+ U64_C(0xf6ab73d5c8f73124), U64_C(0x1a67a3e185c61fd5),
+ U64_C(0x2dc91004d43c065e), U64_C(0x0240b02c8fb93a28),
+ U64_C(0x90f7f2b26cc0eb8f), U64_C(0x3cd3a16f114fd617),
+ U64_C(0xaae49ea9f15973e0), U64_C(0x06c0cd748cd64e78),
+ U64_C(0xda423bc7d5192a6e), U64_C(0xc345701c16b41287),
+ U64_C(0x6d2193ede4821537), U64_C(0xfcf639494190e3ac),
+ U64_C(0x7c3b228621f1c57e), U64_C(0xfb16ac2b0494b0c0),
+ U64_C(0xbf7e529a3745d7f9), U64_C(0x6881b6a32e3f7c73),
+ U64_C(0xca78d2bad9b8e733), U64_C(0xbbfe2fc2342aa3a9),
+ U64_C(0x0dbddffecc6381e4), U64_C(0x70a6a56e2440598e),
+ U64_C(0xe4d12a844befc651), U64_C(0x8c509c2765d0ba22),
+ U64_C(0xee8c6018c28814d9), U64_C(0x17da7c1f49a59e31),
+ U64_C(0x609c4c1328e194d3), U64_C(0xb3e3d57232f44b09),
+ U64_C(0x91d7aaa4a512f69b), U64_C(0x0ffd6fd243dabbcc),
+ U64_C(0x50d26a943c1fde34), U64_C(0x6be15e9968545b4f),
+ U64_C(0x94778fea6faf9fdf), U64_C(0x2b09dd7058ea4826),
+ U64_C(0x677cd9716de5c7bf), U64_C(0x49d5214fffb2e6dd),
+ U64_C(0x0360e83a466b273c), U64_C(0x1fc786af4f7b7691),
+ U64_C(0xa0b9d435783ea168), U64_C(0xd49f0c035f118cb6),
+ U64_C(0x01205816c9d21d14), U64_C(0xac2453dd7d8f3d98),
+ U64_C(0x545217cc3f70aa64), U64_C(0x26b4028e9489c9c2),
+ U64_C(0xdec2469fd6765e3e), U64_C(0x04807d58036f7450),
+ U64_C(0xe5f17292823ddb45), U64_C(0xf30b569b024a5860),
+ U64_C(0x62dcfc3fa758aefb), U64_C(0xe84cad6c4e5e5aa1),
+ U64_C(0xccb81fce556ea94b), U64_C(0x53b282ae7a74f908),
+ U64_C(0x1b47fbf74c1402c1), U64_C(0x368eebf39828049f),
+ U64_C(0x7afbeff2ad278b06), U64_C(0xbe5e0a8cfe97caed),
+ U64_C(0xcfd8f7f413058e77), U64_C(0xf78b2bc301252c30),
+ U64_C(0x4d555c17fcdd928d), U64_C(0x5f2f05467fc565f8),
+ U64_C(0x24f4b2a21b30f3ea), U64_C(0x860dd6bbecb768aa),
+ U64_C(0x4c750401350f8f99), U64_C(0x0000000000000000),
+ U64_C(0xecccd0344d312ef1), U64_C(0xb5231806be220571),
+ U64_C(0xc105c030990d28af), U64_C(0x653c695de25cfd97),
+ U64_C(0x159acc33c61ca419), U64_C(0xb89ec7f872418495),
+ U64_C(0xa9847693b73254dc), U64_C(0x58cf90243ac13694),
+ U64_C(0x59efc832f3132b80), U64_C(0x5c4fed7c39ae42c4),
+ U64_C(0x828dabe3efd81cfa), U64_C(0xd13f294d95ace5f2),
+ U64_C(0x7d1b7a90e823d86a), U64_C(0xb643f03cf849224d),
+ U64_C(0x3df3f979d89dcb03), U64_C(0x7426d836272f2dde),
+ U64_C(0xdfe21e891fa4432a), U64_C(0x3a136c1b9d99986f),
+ U64_C(0xfa36f43dcd46add4), U64_C(0xc025982650df35bb),
+ U64_C(0x856d3e81aadc4f96), U64_C(0xc4a5e57e53b041eb),
+ U64_C(0x4708168b75ba4005), U64_C(0xaf44bbe73be41aa4),
+ U64_C(0x971767d029c4b8e3), U64_C(0xb9be9feebb939981),
+ U64_C(0x215497ecd18d9aae), U64_C(0x316e7e91dd2c57f3),
+ U64_C(0xcef8afe2dad79363), U64_C(0x3853dc371220a247),
+ U64_C(0x35ee03c9de4323a3), U64_C(0xe6919aa8c456fc79),
+ U64_C(0xe05157dc4880b201), U64_C(0x7bdbb7e464f59612),
+ U64_C(0x127a59518318f775), U64_C(0x332ecebd52956ddb),
+ U64_C(0x8f30741d23bb9d1e), U64_C(0xd922d3fd93720d52),
+ U64_C(0x7746300c61440ae2), U64_C(0x25d4eab4d2e2eefe),
+ U64_C(0x75068020eefd30ca), U64_C(0x135a01474acaea61),
+ U64_C(0x304e268714fe4ae7), U64_C(0xa519f17bb283c82c),
+ U64_C(0xdc82f6b359cf6416), U64_C(0x5baf781e7caa11a8),
+ U64_C(0xb2c38d64fb26561d), U64_C(0x34ce5bdf17913eb7),
+ U64_C(0x5d6fb56af07c5fd0), U64_C(0x182713cd0a7f25fd),
+ U64_C(0x9e2ac576e6c84d57), U64_C(0x9aaab82ee5a73907),
+ U64_C(0xa3d93c0f3e558654), U64_C(0x7e7b92aaae48ff56),
+ U64_C(0x872d8ead256575be), U64_C(0x41c8dbfff96c0e7d),
+ U64_C(0x99ca5014a3cc1e3b), U64_C(0x40e883e930be1369),
+ U64_C(0x1ca76e95091051ad), U64_C(0x4e35b42dbab6b5b1),
+ U64_C(0x05a0254ecabd6944), U64_C(0xe1710fca8152af15),
+ U64_C(0xf22b0e8dcb984574), U64_C(0xb763a82a319b3f59),
+ U64_C(0x63fca4296e8ab3ef), U64_C(0x9d4a2d4ca0a36a6b),
+ U64_C(0xe331bfe60eeb953d), U64_C(0xd5bf541596c391a2),
+ U64_C(0xf5cb9bef8e9c1618), U64_C(0x46284e9dbc685d11),
+ U64_C(0x2074cffa185f87ba), U64_C(0xbd3ee2b6b8fcedd1),
+ U64_C(0xae64e3f1f23607b0), U64_C(0xfeb68965ce29d984),
+ U64_C(0x55724fdaf6a2b770), U64_C(0x29496d5cd753720e),
+ U64_C(0xa75941573d3af204), U64_C(0x8e102c0bea69800a),
+ U64_C(0x111ab16bc573d049), U64_C(0xd7ffe439197aab8a),
+ U64_C(0xefac380e0b5a09cd), U64_C(0x48f579593660fbc9),
+ U64_C(0x22347fd697e6bd92), U64_C(0x61bc1405e13389c7),
+ U64_C(0x4ab5c975b9d9c1e1), U64_C(0x80cd1bcf606126d2),
+ U64_C(0x7186fd78ed92449a), U64_C(0x93971a882aabccb3),
+ U64_C(0x88d0e17f66bfce72), U64_C(0x27945a985d5bd4d6) },
+ /* 1 */
+ { U64_C(0xde553f8c05a811c8), U64_C(0x1906b59631b4f565),
+ U64_C(0x436e70d6b1964ff7), U64_C(0x36d343cb8b1e9d85),
+ U64_C(0x843dfacc858aab5a), U64_C(0xfdfc95c299bfc7f9),
+ U64_C(0x0f634bdea1d51fa2), U64_C(0x6d458b3b76efb3cd),
+ U64_C(0x85c3f77cf8593f80), U64_C(0x3c91315fbe737cb2),
+ U64_C(0x2148b03366ace398), U64_C(0x18f8b8264c6761bf),
+ U64_C(0xc830c1c495c9fb0f), U64_C(0x981a76102086a0aa),
+ U64_C(0xaa16012142f35760), U64_C(0x35cc54060c763cf6),
+ U64_C(0x42907d66cc45db2d), U64_C(0x8203d44b965af4bc),
+ U64_C(0x3d6f3cefc3a0e868), U64_C(0xbc73ff69d292bda7),
+ U64_C(0x8722ed0102e20a29), U64_C(0x8f8185e8cd34deb7),
+ U64_C(0x9b0561dda7ee01d9), U64_C(0x5335a0193227fad6),
+ U64_C(0xc9cecc74e81a6fd5), U64_C(0x54f5832e5c2431ea),
+ U64_C(0x99e47ba05d553470), U64_C(0xf7bee756acd226ce),
+ U64_C(0x384e05a5571816fd), U64_C(0xd1367452a47d0e6a),
+ U64_C(0xf29fde1c386ad85b), U64_C(0x320c77316275f7ca),
+ U64_C(0xd0c879e2d9ae9ab0), U64_C(0xdb7406c69110ef5d),
+ U64_C(0x45505e51a2461011), U64_C(0xfc029872e46c5323),
+ U64_C(0xfa3cb6f5f7bc0cc5), U64_C(0x031f17cd8768a173),
+ U64_C(0xbd8df2d9af41297d), U64_C(0x9d3b4f5ab43e5e3f),
+ U64_C(0x4071671b36feee84), U64_C(0x716207e7d3e3b83d),
+ U64_C(0x48d20ff2f9283a1a), U64_C(0x27769eb4757cbc7e),
+ U64_C(0x5c56ebc793f2e574), U64_C(0xa48b474f9ef5dc18),
+ U64_C(0x52cbada94ff46e0c), U64_C(0x60c7da982d8199c6),
+ U64_C(0x0e9d466edc068b78), U64_C(0x4eec2175eaf865fc),
+ U64_C(0x550b8e9e21f7a530), U64_C(0x6b7ba5bc653fec2b),
+ U64_C(0x5eb7f1ba6949d0dd), U64_C(0x57ea94e3db4c9099),
+ U64_C(0xf640eae6d101b214), U64_C(0xdd4a284182c0b0bb),
+ U64_C(0xff1d8fbf6304f250), U64_C(0xb8accb933bf9d7e8),
+ U64_C(0xe8867c478eb68c4d), U64_C(0x3f8e2692391bddc1),
+ U64_C(0xcb2fd60912a15a7c), U64_C(0xaec935dbab983d2f),
+ U64_C(0xf55ffd2b56691367), U64_C(0x80e2ce366ce1c115),
+ U64_C(0x179bf3f8edb27e1d), U64_C(0x01fe0db07dd394da),
+ U64_C(0xda8a0b76ecc37b87), U64_C(0x44ae53e1df9584cb),
+ U64_C(0xb310b4b77347a205), U64_C(0xdfab323c787b8512),
+ U64_C(0x3b511268d070b78e), U64_C(0x65e6e3d2b9396753),
+ U64_C(0x6864b271e2574d58), U64_C(0x259784c98fc789d7),
+ U64_C(0x02e11a7dfabb35a9), U64_C(0x8841a6dfa337158b),
+ U64_C(0x7ade78c39b5dcdd0), U64_C(0xb7cf804d9a2cc84a),
+ U64_C(0x20b6bd831b7f7742), U64_C(0x75bd331d3a88d272),
+ U64_C(0x418f6aab4b2d7a5e), U64_C(0xd9951cbb6babdaf4),
+ U64_C(0xb6318dfde7ff5c90), U64_C(0x1f389b112264aa83),
+ U64_C(0x492c024284fbaec0), U64_C(0xe33a0363c608f9a0),
+ U64_C(0x2688930408af28a4), U64_C(0xc7538a1a341ce4ad),
+ U64_C(0x5da8e677ee2171ae), U64_C(0x8c9e92254a5c7fc4),
+ U64_C(0x63d8cd55aae938b5), U64_C(0x29ebd8daa97a3706),
+ U64_C(0x959827b37be88aa1), U64_C(0x1484e4356adadf6e),
+ U64_C(0xa7945082199d7d6b), U64_C(0xbf6ce8a455fa1cd4),
+ U64_C(0x9cc542eac9edcae5), U64_C(0x79c16f0e1c356ca3),
+ U64_C(0x89bfab6fdee48151), U64_C(0xd4174d1830c5f0ff),
+ U64_C(0x9258048415eb419d), U64_C(0x6139d72850520d1c),
+ U64_C(0x6a85a80c18ec78f1), U64_C(0xcd11f88e0171059a),
+ U64_C(0xcceff53e7ca29140), U64_C(0xd229639f2315af19),
+ U64_C(0x90b91ef9ef507434), U64_C(0x5977d28d074a1be1),
+ U64_C(0x311360fce51d56b9), U64_C(0xc093a92d5a1f2f91),
+ U64_C(0x1a19a25bb6dc5416), U64_C(0xeb996b8a09de2d3e),
+ U64_C(0xfee3820f1ed7668a), U64_C(0xd7085ad5b7ad518c),
+ U64_C(0x7fff41890fe53345), U64_C(0xec5948bd67dde602),
+ U64_C(0x2fd5f65dbaaa68e0), U64_C(0xa5754affe32648c2),
+ U64_C(0xf8ddac880d07396c), U64_C(0x6fa491468c548664),
+ U64_C(0x0c7c5c1326bdbed1), U64_C(0x4a33158f03930fb3),
+ U64_C(0x699abfc19f84d982), U64_C(0xe4fa2054a80b329c),
+ U64_C(0x6707f9af438252fa), U64_C(0x08a368e9cfd6d49e),
+ U64_C(0x47b1442c58fd25b8), U64_C(0xbbb3dc5ebc91769b),
+ U64_C(0x1665fe489061eac7), U64_C(0x33f27a811fa66310),
+ U64_C(0x93a609346838d547), U64_C(0x30ed6d4c98cec263),
+ U64_C(0x1dd9816cd8df9f2a), U64_C(0x94662a03063b1e7b),
+ U64_C(0x83fdd9fbeb896066), U64_C(0x7b207573e68e590a),
+ U64_C(0x5f49fc0a149a4407), U64_C(0x343259b671a5a82c),
+ U64_C(0xfbc2bb458a6f981f), U64_C(0xc272b350a0a41a38),
+ U64_C(0x3aaf1fd8ada32354), U64_C(0x6cbb868b0b3c2717),
+ U64_C(0xa2b569c88d2583fe), U64_C(0xf180c9d1bf027928),
+ U64_C(0xaf37386bd64ba9f5), U64_C(0x12bacab2790a8088),
+ U64_C(0x4c0d3b0810435055), U64_C(0xb2eeb9070e9436df),
+ U64_C(0xc5b29067cea7d104), U64_C(0xdcb425f1ff132461),
+ U64_C(0x4f122cc5972bf126), U64_C(0xac282fa651230886),
+ U64_C(0xe7e537992f6393ef), U64_C(0xe61b3a2952b00735),
+ U64_C(0x709c0a57ae302ce7), U64_C(0xe02514ae416058d3),
+ U64_C(0xc44c9dd7b37445de), U64_C(0x5a68c5408022ba92),
+ U64_C(0x1c278cdca50c0bf0), U64_C(0x6e5a9cf6f18712be),
+ U64_C(0x86dce0b17f319ef3), U64_C(0x2d34ec2040115d49),
+ U64_C(0x4bcd183f7e409b69), U64_C(0x2815d56ad4a9a3dc),
+ U64_C(0x24698979f2141d0d), U64_C(0x0000000000000000),
+ U64_C(0x1ec696a15fb73e59), U64_C(0xd86b110b16784e2e),
+ U64_C(0x8e7f8858b0e74a6d), U64_C(0x063e2e8713d05fe6),
+ U64_C(0xe2c40ed3bbdb6d7a), U64_C(0xb1f1aeca89fc97ac),
+ U64_C(0xe1db191e3cb3cc09), U64_C(0x6418ee62c4eaf389),
+ U64_C(0xc6ad87aa49cf7077), U64_C(0xd6f65765ca7ec556),
+ U64_C(0x9afb6c6dda3d9503), U64_C(0x7ce05644888d9236),
+ U64_C(0x8d609f95378feb1e), U64_C(0x23a9aa4e9c17d631),
+ U64_C(0x6226c0e5d73aac6f), U64_C(0x56149953a69f0443),
+ U64_C(0xeeb852c09d66d3ab), U64_C(0x2b0ac2a753c102af),
+ U64_C(0x07c023376e03cb3c), U64_C(0x2ccae1903dc2c993),
+ U64_C(0xd3d76e2f5ec63bc3), U64_C(0x9e2458973356ff4c),
+ U64_C(0xa66a5d32644ee9b1), U64_C(0x0a427294356de137),
+ U64_C(0x783f62be61e6f879), U64_C(0x1344c70204d91452),
+ U64_C(0x5b96c8f0fdf12e48), U64_C(0xa90916ecc59bf613),
+ U64_C(0xbe92e5142829880e), U64_C(0x727d102a548b194e),
+ U64_C(0x1be7afebcb0fc0cc), U64_C(0x3e702b2244c8491b),
+ U64_C(0xd5e940a84d166425), U64_C(0x66f9f41f3e51c620),
+ U64_C(0xabe80c913f20c3ba), U64_C(0xf07ec461c2d1edf2),
+ U64_C(0xf361d3ac45b94c81), U64_C(0x0521394a94b8fe95),
+ U64_C(0xadd622162cf09c5c), U64_C(0xe97871f7f3651897),
+ U64_C(0xf4a1f09b2bba87bd), U64_C(0x095d6559b2054044),
+ U64_C(0x0bbc7f2448be75ed), U64_C(0x2af4cf172e129675),
+ U64_C(0x157ae98517094bb4), U64_C(0x9fda55274e856b96),
+ U64_C(0x914713499283e0ee), U64_C(0xb952c623462a4332),
+ U64_C(0x74433ead475b46a8), U64_C(0x8b5eb112245fb4f8),
+ U64_C(0xa34b6478f0f61724), U64_C(0x11a5dd7ffe6221fb),
+ U64_C(0xc16da49d27ccbb4b), U64_C(0x76a224d0bde07301),
+ U64_C(0x8aa0bca2598c2022), U64_C(0x4df336b86d90c48f),
+ U64_C(0xea67663a740db9e4), U64_C(0xef465f70e0b54771),
+ U64_C(0x39b008152acb8227), U64_C(0x7d1e5bf4f55e06ec),
+ U64_C(0x105bd0cf83b1b521), U64_C(0x775c2960c033e7db),
+ U64_C(0x7e014c397236a79f), U64_C(0x811cc386113255cf),
+ U64_C(0xeda7450d1a0e72d8), U64_C(0x5889df3d7a998f3b),
+ U64_C(0x2e2bfbedc779fc3a), U64_C(0xce0eef438619a4e9),
+ U64_C(0x372d4e7bf6cd095f), U64_C(0x04df34fae96b6a4f),
+ U64_C(0xf923a13870d4adb6), U64_C(0xa1aa7e050a4d228d),
+ U64_C(0xa8f71b5cb84862c9), U64_C(0xb52e9a306097fde3),
+ U64_C(0x0d8251a35b6e2a0b), U64_C(0x2257a7fee1c442eb),
+ U64_C(0x73831d9a29588d94), U64_C(0x51d4ba64c89ccf7f),
+ U64_C(0x502ab7d4b54f5ba5), U64_C(0x97793dce8153bf08),
+ U64_C(0xe5042de4d5d8a646), U64_C(0x9687307efc802bd2),
+ U64_C(0xa05473b5779eb657), U64_C(0xb4d097801d446939),
+ U64_C(0xcff0e2f3fbca3033), U64_C(0xc38cbee0dd778ee2),
+ U64_C(0x464f499c252eb162), U64_C(0xcad1dbb96f72cea6),
+ U64_C(0xba4dd1eec142e241), U64_C(0xb00fa37af42f0376) },
+ /* 2 */
+ { U64_C(0xcce4cd3aa968b245), U64_C(0x089d5484e80b7faf),
+ U64_C(0x638246c1b3548304), U64_C(0xd2fe0ec8c2355492),
+ U64_C(0xa7fbdf7ff2374eee), U64_C(0x4df1600c92337a16),
+ U64_C(0x84e503ea523b12fb), U64_C(0x0790bbfd53ab0c4a),
+ U64_C(0x198a780f38f6ea9d), U64_C(0x2ab30c8f55ec48cb),
+ U64_C(0xe0f7fed6b2c49db5), U64_C(0xb6ecf3f422cadbdc),
+ U64_C(0x409c9a541358df11), U64_C(0xd3ce8a56dfde3fe3),
+ U64_C(0xc3e9224312c8c1a0), U64_C(0x0d6dfa58816ba507),
+ U64_C(0xddf3e1b179952777), U64_C(0x04c02a42748bb1d9),
+ U64_C(0x94c2abff9f2decb8), U64_C(0x4f91752da8f8acf4),
+ U64_C(0x78682befb169bf7b), U64_C(0xe1c77a48af2ff6c4),
+ U64_C(0x0c5d7ec69c80ce76), U64_C(0x4cc1e4928fd81167),
+ U64_C(0xfeed3d24d9997b62), U64_C(0x518bb6dfc3a54a23),
+ U64_C(0x6dbf2d26151f9b90), U64_C(0xb5bc624b05ea664f),
+ U64_C(0xe86aaa525acfe21a), U64_C(0x4801ced0fb53a0be),
+ U64_C(0xc91463e6c00868ed), U64_C(0x1027a815cd16fe43),
+ U64_C(0xf67069a0319204cd), U64_C(0xb04ccc976c8abce7),
+ U64_C(0xc0b9b3fc35e87c33), U64_C(0xf380c77c58f2de65),
+ U64_C(0x50bb3241de4e2152), U64_C(0xdf93f490435ef195),
+ U64_C(0xf1e0d25d62390887), U64_C(0xaf668bfb1a3c3141),
+ U64_C(0xbc11b251f00a7291), U64_C(0x73a5eed47e427d47),
+ U64_C(0x25bee3f6ee4c3b2e), U64_C(0x43cc0beb34786282),
+ U64_C(0xc824e778dde3039c), U64_C(0xf97d86d98a327728),
+ U64_C(0xf2b043e24519b514), U64_C(0xe297ebf7880f4b57),
+ U64_C(0x3a94a49a98fab688), U64_C(0x868516cb68f0c419),
+ U64_C(0xeffa11af0964ee50), U64_C(0xa4ab4ec0d517f37d),
+ U64_C(0xa9c6b498547c567a), U64_C(0x8e18424f80fbbbb6),
+ U64_C(0x0bcdc53bcf2bc23c), U64_C(0x137739aaea3643d0),
+ U64_C(0x2c1333ec1bac2ff0), U64_C(0x8d48d3f0a7db0625),
+ U64_C(0x1e1ac3f26b5de6d7), U64_C(0xf520f81f16b2b95e),
+ U64_C(0x9f0f6ec450062e84), U64_C(0x0130849e1deb6b71),
+ U64_C(0xd45e31ab8c7533a9), U64_C(0x652279a2fd14e43f),
+ U64_C(0x3209f01e70f1c927), U64_C(0xbe71a770cac1a473),
+ U64_C(0x0e3d6be7a64b1894), U64_C(0x7ec8148cff29d840),
+ U64_C(0xcb7476c7fac3be0f), U64_C(0x72956a4a63a91636),
+ U64_C(0x37f95ec21991138f), U64_C(0x9e3fea5a4ded45f5),
+ U64_C(0x7b38ba50964902e8), U64_C(0x222e580bbde73764),
+ U64_C(0x61e253e0899f55e6), U64_C(0xfc8d2805e352ad80),
+ U64_C(0x35994be3235ac56d), U64_C(0x09add01af5e014de),
+ U64_C(0x5e8659a6780539c6), U64_C(0xb17c48097161d796),
+ U64_C(0x026015213acbd6e2), U64_C(0xd1ae9f77e515e901),
+ U64_C(0xb7dc776a3f21b0ad), U64_C(0xaba6a1b96eb78098),
+ U64_C(0x9bcf4486248d9f5d), U64_C(0x582666c536455efd),
+ U64_C(0xfdbdac9bfeb9c6f1), U64_C(0xc47999be4163cdea),
+ U64_C(0x765540081722a7ef), U64_C(0x3e548ed8ec710751),
+ U64_C(0x3d041f67cb51bac2), U64_C(0x7958af71ac82d40a),
+ U64_C(0x36c9da5c047a78fe), U64_C(0xed9a048e33af38b2),
+ U64_C(0x26ee7249c96c86bd), U64_C(0x900281bdeba65d61),
+ U64_C(0x11172c8bd0fd9532), U64_C(0xea0abf73600434f8),
+ U64_C(0x42fc8f75299309f3), U64_C(0x34a9cf7d3eb1ae1c),
+ U64_C(0x2b838811480723ba), U64_C(0x5ce64c8742ceef24),
+ U64_C(0x1adae9b01fd6570e), U64_C(0x3c349bf9d6bad1b3),
+ U64_C(0x82453c891c7b75c0), U64_C(0x97923a40b80d512b),
+ U64_C(0x4a61dbf1c198765c), U64_C(0xb48ce6d518010d3e),
+ U64_C(0xcfb45c858e480fd6), U64_C(0xd933cbf30d1e96ae),
+ U64_C(0xd70ea014ab558e3a), U64_C(0xc189376228031742),
+ U64_C(0x9262949cd16d8b83), U64_C(0xeb3a3bed7def5f89),
+ U64_C(0x49314a4ee6b8cbcf), U64_C(0xdcc3652f647e4c06),
+ U64_C(0xda635a4c2a3e2b3d), U64_C(0x470c21a940f3d35b),
+ U64_C(0x315961a157d174b4), U64_C(0x6672e81dda3459ac),
+ U64_C(0x5b76f77a1165e36e), U64_C(0x445cb01667d36ec8),
+ U64_C(0xc5491d205c88a69b), U64_C(0x456c34887a3805b9),
+ U64_C(0xffddb9bac4721013), U64_C(0x99af51a71e4649bf),
+ U64_C(0xa15be01cbc7729d5), U64_C(0x52db2760e485f7b0),
+ U64_C(0x8c78576eba306d54), U64_C(0xae560f6507d75a30),
+ U64_C(0x95f22f6182c687c9), U64_C(0x71c5fbf54489aba5),
+ U64_C(0xca44f259e728d57e), U64_C(0x88b87d2ccebbdc8d),
+ U64_C(0xbab18d32be4a15aa), U64_C(0x8be8ec93e99b611e),
+ U64_C(0x17b713e89ebdf209), U64_C(0xb31c5d284baa0174),
+ U64_C(0xeeca9531148f8521), U64_C(0xb8d198138481c348),
+ U64_C(0x8988f9b2d350b7fc), U64_C(0xb9e11c8d996aa839),
+ U64_C(0x5a4673e40c8e881f), U64_C(0x1687977683569978),
+ U64_C(0xbf4123eed72acf02), U64_C(0x4ea1f1b3b513c785),
+ U64_C(0xe767452be16f91ff), U64_C(0x7505d1b730021a7c),
+ U64_C(0xa59bca5ec8fc980c), U64_C(0xad069eda20f7e7a3),
+ U64_C(0x38f4b1bba231606a), U64_C(0x60d2d77e94743e97),
+ U64_C(0x9affc0183966f42c), U64_C(0x248e6768f3a7505f),
+ U64_C(0xcdd449a4b483d934), U64_C(0x87b59255751baf68),
+ U64_C(0x1bea6d2e023d3c7f), U64_C(0x6b1f12455b5ffcab),
+ U64_C(0x743555292de9710d), U64_C(0xd8034f6d10f5fddf),
+ U64_C(0xc6198c9f7ba81b08), U64_C(0xbb8109aca3a17edb),
+ U64_C(0xfa2d1766ad12cabb), U64_C(0xc729080166437079),
+ U64_C(0x9c5fff7b77269317), U64_C(0x0000000000000000),
+ U64_C(0x15d706c9a47624eb), U64_C(0x6fdf38072fd44d72),
+ U64_C(0x5fb6dd3865ee52b7), U64_C(0xa33bf53d86bcff37),
+ U64_C(0xe657c1b5fc84fa8e), U64_C(0xaa962527735cebe9),
+ U64_C(0x39c43525bfda0b1b), U64_C(0x204e4d2a872ce186),
+ U64_C(0x7a083ece8ba26999), U64_C(0x554b9c9db72efbfa),
+ U64_C(0xb22cd9b656416a05), U64_C(0x96a2bedea5e63a5a),
+ U64_C(0x802529a826b0a322), U64_C(0x8115ad363b5bc853),
+ U64_C(0x8375b81701901eb1), U64_C(0x3069e53f4a3a1fc5),
+ U64_C(0xbd2136cfede119e0), U64_C(0x18bafc91251d81ec),
+ U64_C(0x1d4a524d4c7d5b44), U64_C(0x05f0aedc6960daa8),
+ U64_C(0x29e39d3072ccf558), U64_C(0x70f57f6b5962c0d4),
+ U64_C(0x989fd53903ad22ce), U64_C(0xf84d024797d91c59),
+ U64_C(0x547b1803aac5908b), U64_C(0xf0d056c37fd263f6),
+ U64_C(0xd56eb535919e58d8), U64_C(0x1c7ad6d351963035),
+ U64_C(0x2e7326cd2167f912), U64_C(0xac361a443d1c8cd2),
+ U64_C(0x697f076461942a49), U64_C(0x4b515f6fdc731d2d),
+ U64_C(0x8ad8680df4700a6f), U64_C(0x41ac1eca0eb3b460),
+ U64_C(0x7d988533d80965d3), U64_C(0xa8f6300649973d0b),
+ U64_C(0x7765c4960ac9cc9e), U64_C(0x7ca801adc5e20ea2),
+ U64_C(0xdea3700e5eb59ae4), U64_C(0xa06b6482a19c42a4),
+ U64_C(0x6a2f96db46b497da), U64_C(0x27def6d7d487edcc),
+ U64_C(0x463ca5375d18b82a), U64_C(0xa6cb5be1efdc259f),
+ U64_C(0x53eba3fef96e9cc1), U64_C(0xce84d81b93a364a7),
+ U64_C(0xf4107c810b59d22f), U64_C(0x333974806d1aa256),
+ U64_C(0x0f0def79bba073e5), U64_C(0x231edc95a00c5c15),
+ U64_C(0xe437d494c64f2c6c), U64_C(0x91320523f64d3610),
+ U64_C(0x67426c83c7df32dd), U64_C(0x6eefbc99323f2603),
+ U64_C(0x9d6f7be56acdf866), U64_C(0x5916e25b2bae358c),
+ U64_C(0x7ff89012e2c2b331), U64_C(0x035091bf2720bd93),
+ U64_C(0x561b0d22900e4669), U64_C(0x28d319ae6f279e29),
+ U64_C(0x2f43a2533c8c9263), U64_C(0xd09e1be9f8fe8270),
+ U64_C(0xf740ed3e2c796fbc), U64_C(0xdb53ded237d5404c),
+ U64_C(0x62b2c25faebfe875), U64_C(0x0afd41a5d2c0a94d),
+ U64_C(0x6412fd3ce0ff8f4e), U64_C(0xe3a76f6995e42026),
+ U64_C(0x6c8fa9b808f4f0e1), U64_C(0xc2d9a6dd0f23aad1),
+ U64_C(0x8f28c6d19d10d0c7), U64_C(0x85d587744fd0798a),
+ U64_C(0xa20b71a39b579446), U64_C(0x684f83fa7c7f4138),
+ U64_C(0xe507500adba4471d), U64_C(0x3f640a46f19a6c20),
+ U64_C(0x1247bd34f7dd28a1), U64_C(0x2d23b77206474481),
+ U64_C(0x93521002cc86e0f2), U64_C(0x572b89bc8de52d18),
+ U64_C(0xfb1d93f8b0f9a1ca), U64_C(0xe95a2ecc4724896b),
+ U64_C(0x3ba420048511ddf9), U64_C(0xd63e248ab6bee54b),
+ U64_C(0x5dd6c8195f258455), U64_C(0x06a03f634e40673b),
+ U64_C(0x1f2a476c76b68da6), U64_C(0x217ec9b49ac78af7),
+ U64_C(0xecaa80102e4453c3), U64_C(0x14e78257b99d4f9a) },
+ /* 3 */
+ { U64_C(0x20329b2cc87bba05), U64_C(0x4f5eb6f86546a531),
+ U64_C(0xd4f44775f751b6b1), U64_C(0x8266a47b850dfa8b),
+ U64_C(0xbb986aa15a6ca985), U64_C(0xc979eb08f9ae0f99),
+ U64_C(0x2da6f447a2375ea1), U64_C(0x1e74275dcd7d8576),
+ U64_C(0xbc20180a800bc5f8), U64_C(0xb4a2f701b2dc65be),
+ U64_C(0xe726946f981b6d66), U64_C(0x48e6c453bf21c94c),
+ U64_C(0x42cad9930f0a4195), U64_C(0xefa47b64aacccd20),
+ U64_C(0x71180a8960409a42), U64_C(0x8bb3329bf6a44e0c),
+ U64_C(0xd34c35de2d36dacc), U64_C(0xa92f5b7cbc23dc96),
+ U64_C(0xb31a85aa68bb09c3), U64_C(0x13e04836a73161d2),
+ U64_C(0xb24dfc4129c51d02), U64_C(0x8ae44b70b7da5acd),
+ U64_C(0xe671ed84d96579a7), U64_C(0xa4bb3417d66f3832),
+ U64_C(0x4572ab38d56d2de8), U64_C(0xb1b47761ea47215c),
+ U64_C(0xe81c09cf70aba15d), U64_C(0xffbdb872ce7f90ac),
+ U64_C(0xa8782297fd5dc857), U64_C(0x0d946f6b6a4ce4a4),
+ U64_C(0xe4df1f4f5b995138), U64_C(0x9ebc71edca8c5762),
+ U64_C(0x0a2c1dc0b02b88d9), U64_C(0x3b503c115d9d7b91),
+ U64_C(0xc64376a8111ec3a2), U64_C(0xcec199a323c963e4),
+ U64_C(0xdc76a87ec58616f7), U64_C(0x09d596e073a9b487),
+ U64_C(0x14583a9d7d560daf), U64_C(0xf4c6dc593f2a0cb4),
+ U64_C(0xdd21d19584f80236), U64_C(0x4a4836983ddde1d3),
+ U64_C(0xe58866a41ae745f9), U64_C(0xf591a5b27e541875),
+ U64_C(0x891dc05074586693), U64_C(0x5b068c651810a89e),
+ U64_C(0xa30346bc0c08544f), U64_C(0x3dbf3751c684032d),
+ U64_C(0x2a1e86ec785032dc), U64_C(0xf73f5779fca830ea),
+ U64_C(0xb60c05ca30204d21), U64_C(0x0cc316802b32f065),
+ U64_C(0x8770241bdd96be69), U64_C(0xb861e18199ee95db),
+ U64_C(0xf805cad91418fcd1), U64_C(0x29e70dccbbd20e82),
+ U64_C(0xc7140f435060d763), U64_C(0x0f3a9da0e8b0cc3b),
+ U64_C(0xa2543f574d76408e), U64_C(0xbd7761e1c175d139),
+ U64_C(0x4b1f4f737ca3f512), U64_C(0x6dc2df1f2fc137ab),
+ U64_C(0xf1d05c3967b14856), U64_C(0xa742bf3715ed046c),
+ U64_C(0x654030141d1697ed), U64_C(0x07b872abda676c7d),
+ U64_C(0x3ce84eba87fa17ec), U64_C(0xc1fb0403cb79afdf),
+ U64_C(0x3e46bc7105063f73), U64_C(0x278ae987121cd678),
+ U64_C(0xa1adb4778ef47cd0), U64_C(0x26dd906c5362c2b9),
+ U64_C(0x05168060589b44e2), U64_C(0xfbfc41f9d79ac08f),
+ U64_C(0x0e6de44ba9ced8fa), U64_C(0x9feb08068bf243a3),
+ U64_C(0x7b341749d06b129b), U64_C(0x229c69e74a87929a),
+ U64_C(0xe09ee6c4427c011b), U64_C(0x5692e30e725c4c3a),
+ U64_C(0xda99a33e5e9f6e4b), U64_C(0x353dd85af453a36b),
+ U64_C(0x25241b4c90e0fee7), U64_C(0x5de987258309d022),
+ U64_C(0xe230140fc0802984), U64_C(0x93281e86a0c0b3c6),
+ U64_C(0xf229d719a4337408), U64_C(0x6f6c2dd4ad3d1f34),
+ U64_C(0x8ea5b2fbae3f0aee), U64_C(0x8331dd90c473ee4a),
+ U64_C(0x346aa1b1b52db7aa), U64_C(0xdf8f235e06042aa9),
+ U64_C(0xcc6f6b68a1354b7b), U64_C(0x6c95a6f46ebf236a),
+ U64_C(0x52d31a856bb91c19), U64_C(0x1a35ded6d498d555),
+ U64_C(0xf37eaef2e54d60c9), U64_C(0x72e181a9a3c2a61c),
+ U64_C(0x98537aad51952fde), U64_C(0x16f6c856ffaa2530),
+ U64_C(0xd960281e9d1d5215), U64_C(0x3a0745fa1ce36f50),
+ U64_C(0x0b7b642bf1559c18), U64_C(0x59a87eae9aec8001),
+ U64_C(0x5e100c05408bec7c), U64_C(0x0441f98b19e55023),
+ U64_C(0xd70dcc5534d38aef), U64_C(0x927f676de1bea707),
+ U64_C(0x9769e70db925e3e5), U64_C(0x7a636ea29115065a),
+ U64_C(0x468b201816ef11b6), U64_C(0xab81a9b73edff409),
+ U64_C(0xc0ac7de88a07bb1e), U64_C(0x1f235eb68c0391b7),
+ U64_C(0x6056b074458dd30f), U64_C(0xbe8eeac102f7ed67),
+ U64_C(0xcd381283e04b5fba), U64_C(0x5cbefecec277c4e3),
+ U64_C(0xd21b4c356c48ce0d), U64_C(0x1019c31664b35d8c),
+ U64_C(0x247362a7d19eea26), U64_C(0xebe582efb3299d03),
+ U64_C(0x02aef2cb82fc289f), U64_C(0x86275df09ce8aaa8),
+ U64_C(0x28b07427faac1a43), U64_C(0x38a9b7319e1f47cf),
+ U64_C(0xc82e92e3b8d01b58), U64_C(0x06ef0b409b1978bc),
+ U64_C(0x62f842bfc771fb90), U64_C(0x9904034610eb3b1f),
+ U64_C(0xded85ab5477a3e68), U64_C(0x90d195a663428f98),
+ U64_C(0x5384636e2ac708d8), U64_C(0xcbd719c37b522706),
+ U64_C(0xae9729d76644b0eb), U64_C(0x7c8c65e20a0c7ee6),
+ U64_C(0x80c856b007f1d214), U64_C(0x8c0b40302cc32271),
+ U64_C(0xdbcedad51fe17a8a), U64_C(0x740e8ae938dbdea0),
+ U64_C(0xa615c6dc549310ad), U64_C(0x19cc55f6171ae90b),
+ U64_C(0x49b1bdb8fe5fdd8d), U64_C(0xed0a89af2830e5bf),
+ U64_C(0x6a7aadb4f5a65bd6), U64_C(0x7e22972988f05679),
+ U64_C(0xf952b3325566e810), U64_C(0x39fecedadf61530e),
+ U64_C(0x6101c99f04f3c7ce), U64_C(0x2e5f7f6761b562ff),
+ U64_C(0xf08725d226cf5c97), U64_C(0x63af3b54860fef51),
+ U64_C(0x8ff2cb10ef411e2f), U64_C(0x884ab9bb35267252),
+ U64_C(0x4df04433e7ba8dae), U64_C(0x9afd8866d3690741),
+ U64_C(0x66b9bb34de94abb3), U64_C(0x9baaf18d92171380),
+ U64_C(0x543c11c5f0a064a5), U64_C(0x17a1b1bdbed431f1),
+ U64_C(0xb5f58eeaf3a2717f), U64_C(0xc355f6c849858740),
+ U64_C(0xec5df044694ef17e), U64_C(0xd83751f5dc6346d4),
+ U64_C(0xfc4433520dfdacf2), U64_C(0x0000000000000000),
+ U64_C(0x5a51f58e596ebc5f), U64_C(0x3285aaf12e34cf16),
+ U64_C(0x8d5c39db6dbd36b0), U64_C(0x12b731dde64f7513),
+ U64_C(0x94906c2d7aa7dfbb), U64_C(0x302b583aacc8e789),
+ U64_C(0x9d45facd090e6b3c), U64_C(0x2165e2c78905aec4),
+ U64_C(0x68d45f7f775a7349), U64_C(0x189b2c1d5664fdca),
+ U64_C(0xe1c99f2f030215da), U64_C(0x6983269436246788),
+ U64_C(0x8489af3b1e148237), U64_C(0xe94b702431d5b59c),
+ U64_C(0x33d2d31a6f4adbd7), U64_C(0xbfd9932a4389f9a6),
+ U64_C(0xb0e30e8aab39359d), U64_C(0xd1e2c715afcaf253),
+ U64_C(0x150f43763c28196e), U64_C(0xc4ed846393e2eb3d),
+ U64_C(0x03f98b20c3823c5e), U64_C(0xfd134ab94c83b833),
+ U64_C(0x556b682eb1de7064), U64_C(0x36c4537a37d19f35),
+ U64_C(0x7559f30279a5ca61), U64_C(0x799ae58252973a04),
+ U64_C(0x9c12832648707ffd), U64_C(0x78cd9c6913e92ec5),
+ U64_C(0x1d8dac7d0effb928), U64_C(0x439da0784e745554),
+ U64_C(0x413352b3cc887dcb), U64_C(0xbacf134a1b12bd44),
+ U64_C(0x114ebafd25cd494d), U64_C(0x2f08068c20cb763e),
+ U64_C(0x76a07822ba27f63f), U64_C(0xeab2fb04f25789c2),
+ U64_C(0xe3676de481fe3d45), U64_C(0x1b62a73d95e6c194),
+ U64_C(0x641749ff5c68832c), U64_C(0xa5ec4dfc97112cf3),
+ U64_C(0xf6682e92bdd6242b), U64_C(0x3f11c59a44782bb2),
+ U64_C(0x317c21d1edb6f348), U64_C(0xd65ab5be75ad9e2e),
+ U64_C(0x6b2dd45fb4d84f17), U64_C(0xfaab381296e4d44e),
+ U64_C(0xd0b5befeeeb4e692), U64_C(0x0882ef0b32d7a046),
+ U64_C(0x512a91a5a83b2047), U64_C(0x963e9ee6f85bf724),
+ U64_C(0x4e09cf132438b1f0), U64_C(0x77f701c9fb59e2fe),
+ U64_C(0x7ddb1c094b726a27), U64_C(0x5f4775ee01f5f8bd),
+ U64_C(0x9186ec4d223c9b59), U64_C(0xfeeac1998f01846d),
+ U64_C(0xac39db1ce4b89874), U64_C(0xb75b7c21715e59e0),
+ U64_C(0xafc0503c273aa42a), U64_C(0x6e3b543fec430bf5),
+ U64_C(0x704f7362213e8e83), U64_C(0x58ff0745db9294c0),
+ U64_C(0x67eec2df9feabf72), U64_C(0xa0facd9ccf8a6811),
+ U64_C(0xb936986ad890811a), U64_C(0x95c715c63bd9cb7a),
+ U64_C(0xca8060283a2c33c7), U64_C(0x507de84ee9453486),
+ U64_C(0x85ded6d05f6a96f6), U64_C(0x1cdad5964f81ade9),
+ U64_C(0xd5a33e9eb62fa270), U64_C(0x40642b588df6690a),
+ U64_C(0x7f75eec2c98e42b8), U64_C(0x2cf18dace3494a60),
+ U64_C(0x23cb100c0bf9865b), U64_C(0xeef3028febb2d9e1),
+ U64_C(0x4425d2d394133929), U64_C(0xaad6d05c7fa1e0c8),
+ U64_C(0xad6ea2f7a5c68cb5), U64_C(0xc2028f2308fb9381),
+ U64_C(0x819f2f5b468fc6d5), U64_C(0xc5bafd88d29cfffc),
+ U64_C(0x47dc59f357910577), U64_C(0x2b49ff07392e261d),
+ U64_C(0x57c59ae5332258fb), U64_C(0x73b6f842e2bcb2dd),
+ U64_C(0xcf96e04862b77725), U64_C(0x4ca73dd8a6c4996f),
+ U64_C(0x015779eb417e14c1), U64_C(0x37932a9176af8bf4) },
+ /* 4 */
+ { U64_C(0x190a2c9b249df23e), U64_C(0x2f62f8b62263e1e9),
+ U64_C(0x7a7f754740993655), U64_C(0x330b7ba4d5564d9f),
+ U64_C(0x4c17a16a46672582), U64_C(0xb22f08eb7d05f5b8),
+ U64_C(0x535f47f40bc148cc), U64_C(0x3aec5d27d4883037),
+ U64_C(0x10ed0a1825438f96), U64_C(0x516101f72c233d17),
+ U64_C(0x13cc6f949fd04eae), U64_C(0x739853c441474bfd),
+ U64_C(0x653793d90d3f5b1b), U64_C(0x5240647b96b0fc2f),
+ U64_C(0x0c84890ad27623e0), U64_C(0xd7189b32703aaea3),
+ U64_C(0x2685de3523bd9c41), U64_C(0x99317c5b11bffefa),
+ U64_C(0x0d9baa854f079703), U64_C(0x70b93648fbd48ac5),
+ U64_C(0xa80441fce30bc6be), U64_C(0x7287704bdc36ff1e),
+ U64_C(0xb65384ed33dc1f13), U64_C(0xd36417343ee34408),
+ U64_C(0x39cd38ab6e1bf10f), U64_C(0x5ab861770a1f3564),
+ U64_C(0x0ebacf09f594563b), U64_C(0xd04572b884708530),
+ U64_C(0x3cae9722bdb3af47), U64_C(0x4a556b6f2f5cbaf2),
+ U64_C(0xe1704f1f76c4bd74), U64_C(0x5ec4ed7144c6dfcf),
+ U64_C(0x16afc01d4c7810e6), U64_C(0x283f113cd629ca7a),
+ U64_C(0xaf59a8761741ed2d), U64_C(0xeed5a3991e215fac),
+ U64_C(0x3bf37ea849f984d4), U64_C(0xe413e096a56ce33c),
+ U64_C(0x2c439d3a98f020d1), U64_C(0x637559dc6404c46b),
+ U64_C(0x9e6c95d1e5f5d569), U64_C(0x24bb9836045fe99a),
+ U64_C(0x44efa466dac8ecc9), U64_C(0xc6eab2a5c80895d6),
+ U64_C(0x803b50c035220cc4), U64_C(0x0321658cba93c138),
+ U64_C(0x8f9ebc465dc7ee1c), U64_C(0xd15a5137190131d3),
+ U64_C(0x0fa5ec8668e5e2d8), U64_C(0x91c979578d1037b1),
+ U64_C(0x0642ca05693b9f70), U64_C(0xefca80168350eb4f),
+ U64_C(0x38d21b24f36a45ec), U64_C(0xbeab81e1af73d658),
+ U64_C(0x8cbfd9cae7542f24), U64_C(0xfd19cc0d81f11102),
+ U64_C(0x0ac6430fbb4dbc90), U64_C(0x1d76a09d6a441895),
+ U64_C(0x2a01573ff1cbbfa1), U64_C(0xb572e161894fde2b),
+ U64_C(0x8124734fa853b827), U64_C(0x614b1fdf43e6b1b0),
+ U64_C(0x68ac395c4238cc18), U64_C(0x21d837bfd7f7b7d2),
+ U64_C(0x20c714304a860331), U64_C(0x5cfaab726324aa14),
+ U64_C(0x74c5ba4eb50d606e), U64_C(0xf3a3030474654739),
+ U64_C(0x23e671bcf015c209), U64_C(0x45f087e947b9582a),
+ U64_C(0xd8bd77b418df4c7b), U64_C(0xe06f6c90ebb50997),
+ U64_C(0x0bd96080263c0873), U64_C(0x7e03f9410e40dcfe),
+ U64_C(0xb8e94be4c6484928), U64_C(0xfb5b0608e8ca8e72),
+ U64_C(0x1a2b49179e0e3306), U64_C(0x4e29e76961855059),
+ U64_C(0x4f36c4e6fcf4e4ba), U64_C(0x49740ee395cf7bca),
+ U64_C(0xc2963ea386d17f7d), U64_C(0x90d65ad810618352),
+ U64_C(0x12d34c1b02a1fa4d), U64_C(0xfa44258775bb3a91),
+ U64_C(0x18150f14b9ec46dd), U64_C(0x1491861e6b9a653d),
+ U64_C(0x9a1019d7ab2c3fc2), U64_C(0x3668d42d06fe13d7),
+ U64_C(0xdcc1fbb25606a6d0), U64_C(0x969490dd795a1c22),
+ U64_C(0x3549b1a1bc6dd2ef), U64_C(0xc94f5e23a0ed770e),
+ U64_C(0xb9f6686b5b39fdcb), U64_C(0xc4d4f4a6efeae00d),
+ U64_C(0xe732851a1fff2204), U64_C(0x94aad6de5eb869f9),
+ U64_C(0x3f8ff2ae07206e7f), U64_C(0xfe38a9813b62d03a),
+ U64_C(0xa7a1ad7a8bee2466), U64_C(0x7b6056c8dde882b6),
+ U64_C(0x302a1e286fc58ca7), U64_C(0x8da0fa457a259bc7),
+ U64_C(0xb3302b64e074415b), U64_C(0x5402ae7eff8b635f),
+ U64_C(0x08f8050c9cafc94b), U64_C(0xae468bf98a3059ce),
+ U64_C(0x88c355cca98dc58f), U64_C(0xb10e6d67c7963480),
+ U64_C(0xbad70de7e1aa3cf3), U64_C(0xbfb4a26e320262bb),
+ U64_C(0xcb711820870f02d5), U64_C(0xce12b7a954a75c9d),
+ U64_C(0x563ce87dd8691684), U64_C(0x9f73b65e7884618a),
+ U64_C(0x2b1e74b06cba0b42), U64_C(0x47cec1ea605b2df1),
+ U64_C(0x1c698312f735ac76), U64_C(0x5fdbcefed9b76b2c),
+ U64_C(0x831a354c8fb1cdfc), U64_C(0x820516c312c0791f),
+ U64_C(0xb74ca762aeadabf0), U64_C(0xfc06ef821c80a5e1),
+ U64_C(0x5723cbf24518a267), U64_C(0x9d4df05d5f661451),
+ U64_C(0x588627742dfd40bf), U64_C(0xda8331b73f3d39a0),
+ U64_C(0x17b0e392d109a405), U64_C(0xf965400bcf28fba9),
+ U64_C(0x7c3dbf4229a2a925), U64_C(0x023e460327e275db),
+ U64_C(0x6cd0b55a0ce126b3), U64_C(0xe62da695828e96e7),
+ U64_C(0x42ad6e63b3f373b9), U64_C(0xe50cc319381d57df),
+ U64_C(0xc5cbd729729b54ee), U64_C(0x46d1e265fd2a9912),
+ U64_C(0x6428b056904eeff8), U64_C(0x8be23040131e04b7),
+ U64_C(0x6709d5da2add2ec0), U64_C(0x075de98af44a2b93),
+ U64_C(0x8447dcc67bfbe66f), U64_C(0x6616f655b7ac9a23),
+ U64_C(0xd607b8bded4b1a40), U64_C(0x0563af89d3a85e48),
+ U64_C(0x3db1b4ad20c21ba4), U64_C(0x11f22997b8323b75),
+ U64_C(0x292032b34b587e99), U64_C(0x7f1cdace9331681d),
+ U64_C(0x8e819fc9c0b65aff), U64_C(0xa1e3677fe2d5bb16),
+ U64_C(0xcd33d225ee349da5), U64_C(0xd9a2543b85aef898),
+ U64_C(0x795e10cbfa0af76d), U64_C(0x25a4bbb9992e5d79),
+ U64_C(0x78413344677b438e), U64_C(0xf0826688cef68601),
+ U64_C(0xd27b34bba392f0eb), U64_C(0x551d8df162fad7bc),
+ U64_C(0x1e57c511d0d7d9ad), U64_C(0xdeffbdb171e4d30b),
+ U64_C(0xf4feea8e802f6caa), U64_C(0xa480c8f6317de55e),
+ U64_C(0xa0fc44f07fa40ff5), U64_C(0x95b5f551c3c9dd1a),
+ U64_C(0x22f952336d6476ea), U64_C(0x0000000000000000),
+ U64_C(0xa6be8ef5169f9085), U64_C(0xcc2cf1aa73452946),
+ U64_C(0x2e7ddb39bf12550a), U64_C(0xd526dd3157d8db78),
+ U64_C(0x486b2d6c08becf29), U64_C(0x9b0f3a58365d8b21),
+ U64_C(0xac78cdfaadd22c15), U64_C(0xbc95c7e28891a383),
+ U64_C(0x6a927f5f65dab9c3), U64_C(0xc3891d2c1ba0cb9e),
+ U64_C(0xeaa92f9f50f8b507), U64_C(0xcf0d9426c9d6e87e),
+ U64_C(0xca6e3baf1a7eb636), U64_C(0xab25247059980786),
+ U64_C(0x69b31ad3df4978fb), U64_C(0xe2512a93cc577c4c),
+ U64_C(0xff278a0ea61364d9), U64_C(0x71a615c766a53e26),
+ U64_C(0x89dc764334fc716c), U64_C(0xf87a638452594f4a),
+ U64_C(0xf2bc208be914f3da), U64_C(0x8766b94ac1682757),
+ U64_C(0xbbc82e687cdb8810), U64_C(0x626a7a53f9757088),
+ U64_C(0xa2c202f358467a2e), U64_C(0x4d0882e5db169161),
+ U64_C(0x09e7268301de7da8), U64_C(0xe897699c771ac0dc),
+ U64_C(0xc8507dac3d9cc3ed), U64_C(0xc0a878a0a1330aa6),
+ U64_C(0x978bb352e42ba8c1), U64_C(0xe9884a13ea6b743f),
+ U64_C(0x279afdbabecc28a2), U64_C(0x047c8c064ed9eaab),
+ U64_C(0x507e2278b15289f4), U64_C(0x599904fbb08cf45c),
+ U64_C(0xbd8ae46d15e01760), U64_C(0x31353da7f2b43844),
+ U64_C(0x8558ff49e68a528c), U64_C(0x76fbfc4d92ef15b5),
+ U64_C(0x3456922e211c660c), U64_C(0x86799ac55c1993b4),
+ U64_C(0x3e90d1219a51da9c), U64_C(0x2d5cbeb505819432),
+ U64_C(0x982e5fd48cce4a19), U64_C(0xdb9c1238a24c8d43),
+ U64_C(0xd439febecaa96f9b), U64_C(0x418c0bef0960b281),
+ U64_C(0x158ea591f6ebd1de), U64_C(0x1f48e69e4da66d4e),
+ U64_C(0x8afd13cf8e6fb054), U64_C(0xf5e1c9011d5ed849),
+ U64_C(0xe34e091c5126c8af), U64_C(0xad67ee7530a398f6),
+ U64_C(0x43b24dec2e82c75a), U64_C(0x75da99c1287cd48d),
+ U64_C(0x92e81cdb3783f689), U64_C(0xa3dd217cc537cecd),
+ U64_C(0x60543c50de970553), U64_C(0x93f73f54aaf2426a),
+ U64_C(0xa91b62737e7a725d), U64_C(0xf19d4507538732e2),
+ U64_C(0x77e4dfc20f9ea156), U64_C(0x7d229ccdb4d31dc6),
+ U64_C(0x1b346a98037f87e5), U64_C(0xedf4c615a4b29e94),
+ U64_C(0x4093286094110662), U64_C(0xb0114ee85ae78063),
+ U64_C(0x6ff1d0d6b672e78b), U64_C(0x6dcf96d591909250),
+ U64_C(0xdfe09e3eec9567e8), U64_C(0x3214582b4827f97c),
+ U64_C(0xb46dc2ee143e6ac8), U64_C(0xf6c0ac8da7cd1971),
+ U64_C(0xebb60c10cd8901e4), U64_C(0xf7df8f023abcad92),
+ U64_C(0x9c52d3d2c217a0b2), U64_C(0x6b8d5cd0f8ab0d20),
+ U64_C(0x3777f7a29b8fa734), U64_C(0x011f238f9d71b4e3),
+ U64_C(0xc1b75b2f3c42be45), U64_C(0x5de588fdfe551ef7),
+ U64_C(0x6eeef3592b035368), U64_C(0xaa3a07ffc4e9b365),
+ U64_C(0xecebe59a39c32a77), U64_C(0x5ba742f8976e8187),
+ U64_C(0x4b4a48e0b22d0e11), U64_C(0xddded83dcb771233),
+ U64_C(0xa59feb79ac0c51bd), U64_C(0xc7f5912a55792135) },
+ /* 5 */
+ { U64_C(0x6d6ae04668a9b08a), U64_C(0x3ab3f04b0be8c743),
+ U64_C(0xe51e166b54b3c908), U64_C(0xbe90a9eb35c2f139),
+ U64_C(0xb2c7066637f2bec1), U64_C(0xaa6945613392202c),
+ U64_C(0x9a28c36f3b5201eb), U64_C(0xddce5a93ab536994),
+ U64_C(0x0e34133ef6382827), U64_C(0x52a02ba1ec55048b),
+ U64_C(0xa2f88f97c4b2a177), U64_C(0x8640e513ca2251a5),
+ U64_C(0xcdf1d36258137622), U64_C(0xfe6cb708dedf8ddb),
+ U64_C(0x8a174a9ec8121e5d), U64_C(0x679896036b81560e),
+ U64_C(0x59ed033395795fee), U64_C(0x1dd778ab8b74edaf),
+ U64_C(0xee533ef92d9f926d), U64_C(0x2a8c79baf8a8d8f5),
+ U64_C(0x6bcf398e69b119f6), U64_C(0xe20491742fafdd95),
+ U64_C(0x276488e0809c2aec), U64_C(0xea955b82d88f5cce),
+ U64_C(0x7102c63a99d9e0c4), U64_C(0xf9763017a5c39946),
+ U64_C(0x429fa2501f151b3d), U64_C(0x4659c72bea05d59e),
+ U64_C(0x984b7fdccf5a6634), U64_C(0xf742232953fbb161),
+ U64_C(0x3041860e08c021c7), U64_C(0x747bfd9616cd9386),
+ U64_C(0x4bb1367192312787), U64_C(0x1b72a1638a6c44d3),
+ U64_C(0x4a0e68a6e8359a66), U64_C(0x169a5039f258b6ca),
+ U64_C(0xb98a2ef44edee5a4), U64_C(0xd9083fe85e43a737),
+ U64_C(0x967f6ce239624e13), U64_C(0x8874f62d3c1a7982),
+ U64_C(0x3c1629830af06e3f), U64_C(0x9165ebfd427e5a8e),
+ U64_C(0xb5dd81794ceeaa5c), U64_C(0x0de8f15a7834f219),
+ U64_C(0x70bd98ede3dd5d25), U64_C(0xaccc9ca9328a8950),
+ U64_C(0x56664eda1945ca28), U64_C(0x221db34c0f8859ae),
+ U64_C(0x26dbd637fa98970d), U64_C(0x1acdffb4f068f932),
+ U64_C(0x4585254f64090fa0), U64_C(0x72de245e17d53afa),
+ U64_C(0x1546b25d7c546cf4), U64_C(0x207e0ffffb803e71),
+ U64_C(0xfaaad2732bcf4378), U64_C(0xb462dfae36ea17bd),
+ U64_C(0xcf926fd1ac1b11fd), U64_C(0xe0672dc7dba7ba4a),
+ U64_C(0xd3fa49ad5d6b41b3), U64_C(0x8ba81449b216a3bc),
+ U64_C(0x14f9ec8a0650d115), U64_C(0x40fc1ee3eb1d7ce2),
+ U64_C(0x23a2ed9b758ce44f), U64_C(0x782c521b14fddc7e),
+ U64_C(0x1c68267cf170504e), U64_C(0xbcf31558c1ca96e6),
+ U64_C(0xa781b43b4ba6d235), U64_C(0xf6fd7dfe29ff0c80),
+ U64_C(0xb0a4bad5c3fad91e), U64_C(0xd199f51ea963266c),
+ U64_C(0x414340349119c103), U64_C(0x5405f269ed4dadf7),
+ U64_C(0xabd61bb649969dcd), U64_C(0x6813dbeae7bdc3c8),
+ U64_C(0x65fb2ab09f8931d1), U64_C(0xf1e7fae152e3181d),
+ U64_C(0xc1a67cef5a2339da), U64_C(0x7a4feea8e0f5bba1),
+ U64_C(0x1e0b9acf05783791), U64_C(0x5b8ebf8061713831),
+ U64_C(0x80e53cdbcb3af8d9), U64_C(0x7e898bd315e57502),
+ U64_C(0xc6bcfbf0213f2d47), U64_C(0x95a38e86b76e942d),
+ U64_C(0x092e94218d243cba), U64_C(0x8339debf453622e7),
+ U64_C(0xb11be402b9fe64ff), U64_C(0x57d9100d634177c9),
+ U64_C(0xcc4e8db52217cbc3), U64_C(0x3b0cae9c71ec7aa2),
+ U64_C(0xfb158ca451cbfe99), U64_C(0x2b33276d82ac6514),
+ U64_C(0x01bf5ed77a04bde1), U64_C(0xc5601994af33f779),
+ U64_C(0x75c4a3416cc92e67), U64_C(0xf3844652a6eb7fc2),
+ U64_C(0x3487e375fdd0ef64), U64_C(0x18ae430704609eed),
+ U64_C(0x4d14efb993298efb), U64_C(0x815a620cb13e4538),
+ U64_C(0x125c354207487869), U64_C(0x9eeea614ce42cf48),
+ U64_C(0xce2d3106d61fac1c), U64_C(0xbbe99247bad6827b),
+ U64_C(0x071a871f7b1c149d), U64_C(0x2e4a1cc10db81656),
+ U64_C(0x77a71ff298c149b8), U64_C(0x06a5d9c80118a97c),
+ U64_C(0xad73c27e488e34b1), U64_C(0x443a7b981e0db241),
+ U64_C(0xe3bbcfa355ab6074), U64_C(0x0af276450328e684),
+ U64_C(0x73617a896dd1871b), U64_C(0x58525de4ef7de20f),
+ U64_C(0xb7be3dcab8e6cd83), U64_C(0x19111dd07e64230c),
+ U64_C(0x842359a03e2a367a), U64_C(0x103f89f1f3401fb6),
+ U64_C(0xdc710444d157d475), U64_C(0xb835702334da5845),
+ U64_C(0x4320fc876511a6dc), U64_C(0xd026abc9d3679b8d),
+ U64_C(0x17250eee885c0b2b), U64_C(0x90dab52a387ae76f),
+ U64_C(0x31fed8d972c49c26), U64_C(0x89cba8fa461ec463),
+ U64_C(0x2ff5421677bcabb7), U64_C(0x396f122f85e41d7d),
+ U64_C(0xa09b332430bac6a8), U64_C(0xc888e8ced7070560),
+ U64_C(0xaeaf201ac682ee8f), U64_C(0x1180d7268944a257),
+ U64_C(0xf058a43628e7a5fc), U64_C(0xbd4c4b8fbbce2b07),
+ U64_C(0xa1246df34abe7b49), U64_C(0x7d5569b79be9af3c),
+ U64_C(0xa9b5a705bd9efa12), U64_C(0xdb6b835baa4bc0e8),
+ U64_C(0x05793bac8f147342), U64_C(0x21c1512881848390),
+ U64_C(0xfdb0556c50d357e5), U64_C(0x613d4fcb6a99ff72),
+ U64_C(0x03dce2648e0cda3e), U64_C(0xe949b9e6568386f0),
+ U64_C(0xfc0f0bbb2ad7ea04), U64_C(0x6a70675913b5a417),
+ U64_C(0x7f36d5046fe1c8e3), U64_C(0x0c57af8d02304ff8),
+ U64_C(0x32223abdfcc84618), U64_C(0x0891caf6f720815b),
+ U64_C(0xa63eeaec31a26fd4), U64_C(0x2507345374944d33),
+ U64_C(0x49d28ac266394058), U64_C(0xf5219f9aa7f3d6be),
+ U64_C(0x2d96fea583b4cc68), U64_C(0x5a31e1571b7585d0),
+ U64_C(0x8ed12fe53d02d0fe), U64_C(0xdfade6205f5b0e4b),
+ U64_C(0x4cabb16ee92d331a), U64_C(0x04c6657bf510cea3),
+ U64_C(0xd73c2cd6a87b8f10), U64_C(0xe1d87310a1a307ab),
+ U64_C(0x6cd5be9112ad0d6b), U64_C(0x97c032354366f3f2),
+ U64_C(0xd4e0ceb22677552e), U64_C(0x0000000000000000),
+ U64_C(0x29509bde76a402cb), U64_C(0xc27a9e8bd42fe3e4),
+ U64_C(0x5ef7842cee654b73), U64_C(0xaf107ecdbc86536e),
+ U64_C(0x3fcacbe784fcb401), U64_C(0xd55f90655c73e8cf),
+ U64_C(0xe6c2f40fdabf1336), U64_C(0xe8f6e7312c873b11),
+ U64_C(0xeb2a0555a28be12f), U64_C(0xe4a148bc2eb774e9),
+ U64_C(0x9b979db84156bc0a), U64_C(0x6eb60222e6a56ab4),
+ U64_C(0x87ffbbc4b026ec44), U64_C(0xc703a5275b3b90a6),
+ U64_C(0x47e699fc9001687f), U64_C(0x9c8d1aa73a4aa897),
+ U64_C(0x7cea3760e1ed12dd), U64_C(0x4ec80ddd1d2554c5),
+ U64_C(0x13e36b957d4cc588), U64_C(0x5d2b66486069914d),
+ U64_C(0x92b90999cc7280b0), U64_C(0x517cc9c56259deb5),
+ U64_C(0xc937b619ad03b881), U64_C(0xec30824ad997f5b2),
+ U64_C(0xa45d565fc5aa080b), U64_C(0xd6837201d27f32f1),
+ U64_C(0x635ef3789e9198ad), U64_C(0x531f75769651b96a),
+ U64_C(0x4f77530a6721e924), U64_C(0x486dd4151c3dfdb9),
+ U64_C(0x5f48dafb9461f692), U64_C(0x375b011173dc355a),
+ U64_C(0x3da9775470f4d3de), U64_C(0x8d0dcd81b30e0ac0),
+ U64_C(0x36e45fc609d888bb), U64_C(0x55baacbe97491016),
+ U64_C(0x8cb29356c90ab721), U64_C(0x76184125e2c5f459),
+ U64_C(0x99f4210bb55edbd5), U64_C(0x6f095cf59ca1d755),
+ U64_C(0x9f51f8c3b44672a9), U64_C(0x3538bda287d45285),
+ U64_C(0x50c39712185d6354), U64_C(0xf23b1885dcefc223),
+ U64_C(0x79930ccc6ef9619f), U64_C(0xed8fdc9da3934853),
+ U64_C(0xcb540aaa590bdf5e), U64_C(0x5c94389f1a6d2cac),
+ U64_C(0xe77daad8a0bbaed7), U64_C(0x28efc5090ca0bf2a),
+ U64_C(0xbf2ff73c4fc64cd8), U64_C(0xb37858b14df60320),
+ U64_C(0xf8c96ec0dfc724a7), U64_C(0x828680683f329f06),
+ U64_C(0x941cd051cd6a29cc), U64_C(0xc3c5c05cae2b5e05),
+ U64_C(0xb601631dc2e27062), U64_C(0xc01922382027843b),
+ U64_C(0x24b86a840e90f0d2), U64_C(0xd245177a276ffc52),
+ U64_C(0x0f8b4de98c3c95c6), U64_C(0x3e759530fef809e0),
+ U64_C(0x0b4d2892792c5b65), U64_C(0xc4df4743d5374a98),
+ U64_C(0xa5e20888bfaeb5ea), U64_C(0xba56cc90c0d23f9a),
+ U64_C(0x38d04cf8ffe0a09c), U64_C(0x62e1adafe495254c),
+ U64_C(0x0263bcb3f40867df), U64_C(0xcaeb547d230f62bf),
+ U64_C(0x6082111c109d4293), U64_C(0xdad4dd8cd04f7d09),
+ U64_C(0xefec602e579b2f8c), U64_C(0x1fb4c4187f7c8a70),
+ U64_C(0xffd3e9dfa4db303a), U64_C(0x7bf0b07f9af10640),
+ U64_C(0xf49ec14dddf76b5f), U64_C(0x8f6e713247066d1f),
+ U64_C(0x339d646a86ccfbf9), U64_C(0x64447467e58d8c30),
+ U64_C(0x2c29a072f9b07189), U64_C(0xd8b7613f24471ad6),
+ U64_C(0x6627c8d41185ebef), U64_C(0xa347d140beb61c96),
+ U64_C(0xde12b8f7255fb3aa), U64_C(0x9d324470404e1576),
+ U64_C(0x9306574eb6763d51), U64_C(0xa80af9d2c79a47f3),
+ U64_C(0x859c0777442e8b9b), U64_C(0x69ac853d9db97e29) },
+ /* 6 */
+ { U64_C(0xc3407dfc2de6377e), U64_C(0x5b9e93eea4256f77),
+ U64_C(0xadb58fdd50c845e0), U64_C(0x5219ff11a75bed86),
+ U64_C(0x356b61cfd90b1de9), U64_C(0xfb8f406e25abe037),
+ U64_C(0x7a5a0231c0f60796), U64_C(0x9d3cd216e1f5020b),
+ U64_C(0x0c6550fb6b48d8f3), U64_C(0xf57508c427ff1c62),
+ U64_C(0x4ad35ffa71cb407d), U64_C(0x6290a2da1666aa6d),
+ U64_C(0xe284ec2349355f9f), U64_C(0xb3c307c53d7c84ec),
+ U64_C(0x05e23c0468365a02), U64_C(0x190bac4d6c9ebfa8),
+ U64_C(0x94bbbee9e28b80fa), U64_C(0xa34fc777529cb9b5),
+ U64_C(0xcc7b39f095bcd978), U64_C(0x2426addb0ce532e3),
+ U64_C(0x7e79329312ce4fc7), U64_C(0xab09a72eebec2917),
+ U64_C(0xf8d15499f6b9d6c2), U64_C(0x1a55b8babf8c895d),
+ U64_C(0xdb8add17fb769a85), U64_C(0xb57f2f368658e81b),
+ U64_C(0x8acd36f18f3f41f6), U64_C(0x5ce3b7bba50f11d3),
+ U64_C(0x114dcc14d5ee2f0a), U64_C(0xb91a7fcded1030e8),
+ U64_C(0x81d5425fe55de7a1), U64_C(0xb6213bc1554adeee),
+ U64_C(0x80144ef95f53f5f2), U64_C(0x1e7688186db4c10c),
+ U64_C(0x3b912965db5fe1bc), U64_C(0xc281715a97e8252d),
+ U64_C(0x54a5d7e21c7f8171), U64_C(0x4b12535ccbc5522e),
+ U64_C(0x1d289cefbea6f7f9), U64_C(0x6ef5f2217d2e729e),
+ U64_C(0xe6a7dc819b0d17ce), U64_C(0x1b94b41c05829b0e),
+ U64_C(0x33d7493c622f711e), U64_C(0xdcf7f942fa5ce421),
+ U64_C(0x600fba8b7f7a8ecb), U64_C(0x46b60f011a83988e),
+ U64_C(0x235b898e0dcf4c47), U64_C(0x957ab24f588592a9),
+ U64_C(0x4354330572b5c28c), U64_C(0xa5f3ef84e9b8d542),
+ U64_C(0x8c711e02341b2d01), U64_C(0x0b1874ae6a62a657),
+ U64_C(0x1213d8e306fc19ff), U64_C(0xfe6d7c6a4d9dba35),
+ U64_C(0x65ed868f174cd4c9), U64_C(0x88522ea0e6236550),
+ U64_C(0x899322065c2d7703), U64_C(0xc01e690bfef4018b),
+ U64_C(0x915982ed8abddaf8), U64_C(0xbe675b98ec3a4e4c),
+ U64_C(0xa996bf7f82f00db1), U64_C(0xe1daf8d49a27696a),
+ U64_C(0x2effd5d3dc8986e7), U64_C(0xd153a51f2b1a2e81),
+ U64_C(0x18caa0ebd690adfb), U64_C(0x390e3134b243c51a),
+ U64_C(0x2778b92cdff70416), U64_C(0x029f1851691c24a6),
+ U64_C(0x5e7cafeacc133575), U64_C(0xfa4e4cc89fa5f264),
+ U64_C(0x5a5f9f481e2b7d24), U64_C(0x484c47ab18d764db),
+ U64_C(0x400a27f2a1a7f479), U64_C(0xaeeb9b2a83da7315),
+ U64_C(0x721c626879869734), U64_C(0x042330a2d2384851),
+ U64_C(0x85f672fd3765aff0), U64_C(0xba446b3a3e02061d),
+ U64_C(0x73dd6ecec3888567), U64_C(0xffac70ccf793a866),
+ U64_C(0xdfa9edb5294ed2d4), U64_C(0x6c6aea7014325638),
+ U64_C(0x834a5a0e8c41c307), U64_C(0xcdba35562fb2cb2b),
+ U64_C(0x0ad97808d06cb404), U64_C(0x0f3b440cb85aee06),
+ U64_C(0xe5f9c876481f213b), U64_C(0x98deee1289c35809),
+ U64_C(0x59018bbfcd394bd1), U64_C(0xe01bf47220297b39),
+ U64_C(0xde68e1139340c087), U64_C(0x9fa3ca4788e926ad),
+ U64_C(0xbb85679c840c144e), U64_C(0x53d8f3b71d55ffd5),
+ U64_C(0x0da45c5dd146caa0), U64_C(0x6f34fe87c72060cd),
+ U64_C(0x57fbc315cf6db784), U64_C(0xcee421a1fca0fdde),
+ U64_C(0x3d2d0196607b8d4b), U64_C(0x642c8a29ad42c69a),
+ U64_C(0x14aff010bdd87508), U64_C(0xac74837beac657b3),
+ U64_C(0x3216459ad821634d), U64_C(0x3fb219c70967a9ed),
+ U64_C(0x06bc28f3bb246cf7), U64_C(0xf2082c9126d562c6),
+ U64_C(0x66b39278c45ee23c), U64_C(0xbd394f6f3f2878b9),
+ U64_C(0xfd33689d9e8f8cc0), U64_C(0x37f4799eb017394f),
+ U64_C(0x108cc0b26fe03d59), U64_C(0xda4bd1b1417888d6),
+ U64_C(0xb09d1332ee6eb219), U64_C(0x2f3ed975668794b4),
+ U64_C(0x58c0871977375982), U64_C(0x7561463d78ace990),
+ U64_C(0x09876cff037e82f1), U64_C(0x7fb83e35a8c05d94),
+ U64_C(0x26b9b58a65f91645), U64_C(0xef20b07e9873953f),
+ U64_C(0x3148516d0b3355b8), U64_C(0x41cb2b541ba9e62a),
+ U64_C(0x790416c613e43163), U64_C(0xa011d380818e8f40),
+ U64_C(0x3a5025c36151f3ef), U64_C(0xd57095bdf92266d0),
+ U64_C(0x498d4b0da2d97688), U64_C(0x8b0c3a57353153a5),
+ U64_C(0x21c491df64d368e1), U64_C(0x8f2f0af5e7091bf4),
+ U64_C(0x2da1c1240f9bb012), U64_C(0xc43d59a92ccc49da),
+ U64_C(0xbfa6573e56345c1f), U64_C(0x828b56a8364fd154),
+ U64_C(0x9a41f643e0df7caf), U64_C(0xbcf843c985266aea),
+ U64_C(0x2b1de9d7b4bfdce5), U64_C(0x20059d79dedd7ab2),
+ U64_C(0x6dabe6d6ae3c446b), U64_C(0x45e81bf6c991ae7b),
+ U64_C(0x6351ae7cac68b83e), U64_C(0xa432e32253b6c711),
+ U64_C(0xd092a9b991143cd2), U64_C(0xcac711032e98b58f),
+ U64_C(0xd8d4c9e02864ac70), U64_C(0xc5fc550f96c25b89),
+ U64_C(0xd7ef8dec903e4276), U64_C(0x67729ede7e50f06f),
+ U64_C(0xeac28c7af045cf3d), U64_C(0xb15c1f945460a04a),
+ U64_C(0x9cfddeb05bfb1058), U64_C(0x93c69abce3a1fe5e),
+ U64_C(0xeb0380dc4a4bdd6e), U64_C(0xd20db1e8f8081874),
+ U64_C(0x229a8528b7c15e14), U64_C(0x44291750739fbc28),
+ U64_C(0xd3ccbd4e42060a27), U64_C(0xf62b1c33f4ed2a97),
+ U64_C(0x86a8660ae4779905), U64_C(0xd62e814a2a305025),
+ U64_C(0x477703a7a08d8add), U64_C(0x7b9b0e977af815c5),
+ U64_C(0x78c51a60a9ea2330), U64_C(0xa6adfb733aaae3b7),
+ U64_C(0x97e5aa1e3199b60f), U64_C(0x0000000000000000),
+ U64_C(0xf4b404629df10e31), U64_C(0x5564db44a6719322),
+ U64_C(0x9207961a59afec0d), U64_C(0x9624a6b88b97a45c),
+ U64_C(0x363575380a192b1c), U64_C(0x2c60cd82b595a241),
+ U64_C(0x7d272664c1dc7932), U64_C(0x7142769faa94a1c1),
+ U64_C(0xa1d0df263b809d13), U64_C(0x1630e841d4c451ae),
+ U64_C(0xc1df65ad44fa13d8), U64_C(0x13d2d445bcf20bac),
+ U64_C(0xd915c546926abe23), U64_C(0x38cf3d92084dd749),
+ U64_C(0xe766d0272103059d), U64_C(0xc7634d5effde7f2f),
+ U64_C(0x077d2455012a7ea4), U64_C(0xedbfa82ff16fb199),
+ U64_C(0xaf2a978c39d46146), U64_C(0x42953fa3c8bbd0df),
+ U64_C(0xcb061da59496a7dc), U64_C(0x25e7a17db6eb20b0),
+ U64_C(0x34aa6d6963050fba), U64_C(0xa76cf7d580a4f1e4),
+ U64_C(0xf7ea10954ee338c4), U64_C(0xfcf2643b24819e93),
+ U64_C(0xcf252d0746aeef8d), U64_C(0x4ef06f58a3f3082c),
+ U64_C(0x563acfb37563a5d7), U64_C(0x5086e740ce47c920),
+ U64_C(0x2982f186dda3f843), U64_C(0x87696aac5e798b56),
+ U64_C(0x5d22bb1d1f010380), U64_C(0x035e14f7d31236f5),
+ U64_C(0x3cec0d30da759f18), U64_C(0xf3c920379cdb7095),
+ U64_C(0xb8db736b571e22bb), U64_C(0xdd36f5e44052f672),
+ U64_C(0xaac8ab8851e23b44), U64_C(0xa857b3d938fe1fe2),
+ U64_C(0x17f1e4e76eca43fd), U64_C(0xec7ea4894b61a3ca),
+ U64_C(0x9e62c6e132e734fe), U64_C(0xd4b1991b432c7483),
+ U64_C(0x6ad6c283af163acf), U64_C(0x1ce9904904a8e5aa),
+ U64_C(0x5fbda34c761d2726), U64_C(0xf910583f4cb7c491),
+ U64_C(0xc6a241f845d06d7c), U64_C(0x4f3163fe19fd1a7f),
+ U64_C(0xe99c988d2357f9c8), U64_C(0x8eee06535d0709a7),
+ U64_C(0x0efa48aa0254fc55), U64_C(0xb4be23903c56fa48),
+ U64_C(0x763f52caabbedf65), U64_C(0xeee1bcd8227d876c),
+ U64_C(0xe345e085f33b4dcc), U64_C(0x3e731561b369bbbe),
+ U64_C(0x2843fd2067adea10), U64_C(0x2adce5710eb1ceb6),
+ U64_C(0xb7e03767ef44ccbd), U64_C(0x8db012a48e153f52),
+ U64_C(0x61ceb62dc5749c98), U64_C(0xe85d942b9959eb9b),
+ U64_C(0x4c6f7709caef2c8a), U64_C(0x84377e5b8d6bbda3),
+ U64_C(0x30895dcbb13d47eb), U64_C(0x74a04a9bc2a2fbc3),
+ U64_C(0x6b17ce251518289c), U64_C(0xe438c4d0f2113368),
+ U64_C(0x1fb784bed7bad35f), U64_C(0x9b80fae55ad16efc),
+ U64_C(0x77fe5e6c11b0cd36), U64_C(0xc858095247849129),
+ U64_C(0x08466059b97090a2), U64_C(0x01c10ca6ba0e1253),
+ U64_C(0x6988d6747c040c3a), U64_C(0x6849dad2c60a1e69),
+ U64_C(0x5147ebe67449db73), U64_C(0xc99905f4fd8a837a),
+ U64_C(0x991fe2b433cd4a5a), U64_C(0xf09734c04fc94660),
+ U64_C(0xa28ecbd1e892abe6), U64_C(0xf1563866f5c75433),
+ U64_C(0x4dae7baf70e13ed9), U64_C(0x7ce62ac27bd26b61),
+ U64_C(0x70837a39109ab392), U64_C(0x90988e4b30b3c8ab),
+ U64_C(0xb2020b63877296bf), U64_C(0x156efcb607d6675b) },
+ /* 7 */
+ { U64_C(0xe63f55ce97c331d0), U64_C(0x25b506b0015bba16),
+ U64_C(0xc8706e29e6ad9ba8), U64_C(0x5b43d3775d521f6a),
+ U64_C(0x0bfa3d577035106e), U64_C(0xab95fc172afb0e66),
+ U64_C(0xf64b63979e7a3276), U64_C(0xf58b4562649dad4b),
+ U64_C(0x48f7c3dbae0c83f1), U64_C(0xff31916642f5c8c5),
+ U64_C(0xcbb048dc1c4a0495), U64_C(0x66b8f83cdf622989),
+ U64_C(0x35c130e908e2b9b0), U64_C(0x7c761a61f0b34fa1),
+ U64_C(0x3601161cf205268d), U64_C(0x9e54ccfe2219b7d6),
+ U64_C(0x8b7d90a538940837), U64_C(0x9cd403588ea35d0b),
+ U64_C(0xbc3c6fea9ccc5b5a), U64_C(0xe5ff733b6d24aeed),
+ U64_C(0xceed22de0f7eb8d2), U64_C(0xec8581cab1ab545e),
+ U64_C(0xb96105e88ff8e71d), U64_C(0x8ca03501871a5ead),
+ U64_C(0x76ccce65d6db2a2f), U64_C(0x5883f582a7b58057),
+ U64_C(0x3f7be4ed2e8adc3e), U64_C(0x0fe7be06355cd9c9),
+ U64_C(0xee054e6c1d11be83), U64_C(0x1074365909b903a6),
+ U64_C(0x5dde9f80b4813c10), U64_C(0x4a770c7d02b6692c),
+ U64_C(0x5379c8d5d7809039), U64_C(0xb4067448161ed409),
+ U64_C(0x5f5e5026183bd6cd), U64_C(0xe898029bf4c29df9),
+ U64_C(0x7fb63c940a54d09c), U64_C(0xc5171f897f4ba8bc),
+ U64_C(0xa6f28db7b31d3d72), U64_C(0x2e4f3be7716eaa78),
+ U64_C(0x0d6771a099e63314), U64_C(0x82076254e41bf284),
+ U64_C(0x2f0fd2b42733df98), U64_C(0x5c9e76d3e2dc49f0),
+ U64_C(0x7aeb569619606cdb), U64_C(0x83478b07b2468764),
+ U64_C(0xcfadcb8d5923cd32), U64_C(0x85dac7f05b95a41e),
+ U64_C(0xb5469d1b4043a1e9), U64_C(0xb821ecbbd9a592fd),
+ U64_C(0x1b8e0b0e798c13c8), U64_C(0x62a57b6d9a0be02e),
+ U64_C(0xfcf1b793b81257f8), U64_C(0x9d94ea0bd8fe28eb),
+ U64_C(0x4cea408aeb654a56), U64_C(0x23284a47e888996c),
+ U64_C(0x2d8f1d128b893545), U64_C(0xf4cbac3132c0d8ab),
+ U64_C(0xbd7c86b9ca912eba), U64_C(0x3a268eef3dbe6079),
+ U64_C(0xf0d62f6077a9110c), U64_C(0x2735c916ade150cb),
+ U64_C(0x89fd5f03942ee2ea), U64_C(0x1acee25d2fd16628),
+ U64_C(0x90f39bab41181bff), U64_C(0x430dfe8cde39939f),
+ U64_C(0xf70b8ac4c8274796), U64_C(0x1c53aeaac6024552),
+ U64_C(0x13b410acf35e9c9b), U64_C(0xa532ab4249faa24f),
+ U64_C(0x2b1251e5625a163f), U64_C(0xd7e3e676da4841c7),
+ U64_C(0xa7b264e4e5404892), U64_C(0xda8497d643ae72d3),
+ U64_C(0x861ae105a1723b23), U64_C(0x38a6414991048aa4),
+ U64_C(0x6578dec92585b6b4), U64_C(0x0280cfa6acbaeadd),
+ U64_C(0x88bdb650c273970a), U64_C(0x9333bd5ebbff84c2),
+ U64_C(0x4e6a8f2c47dfa08b), U64_C(0x321c954db76cef2a),
+ U64_C(0x418d312a72837942), U64_C(0xb29b38bfffcdf773),
+ U64_C(0x6c022c38f90a4c07), U64_C(0x5a033a240b0f6a8a),
+ U64_C(0x1f93885f3ce5da6f), U64_C(0xc38a537e96988bc6),
+ U64_C(0x39e6a81ac759ff44), U64_C(0x29929e43cee0fce2),
+ U64_C(0x40cdd87924de0ca2), U64_C(0xe9d8ebc8a29fe819),
+ U64_C(0x0c2798f3cfbb46f4), U64_C(0x55e484223e53b343),
+ U64_C(0x4650948ecd0d2fd8), U64_C(0x20e86cb2126f0651),
+ U64_C(0x6d42c56baf5739e7), U64_C(0xa06fc1405ace1e08),
+ U64_C(0x7babbfc54f3d193b), U64_C(0x424d17df8864e67f),
+ U64_C(0xd8045870ef14980e), U64_C(0xc6d7397c85ac3781),
+ U64_C(0x21a885e1443273b1), U64_C(0x67f8116f893f5c69),
+ U64_C(0x24f5efe35706cff6), U64_C(0xd56329d076f2ab1a),
+ U64_C(0x5e1eb9754e66a32d), U64_C(0x28d2771098bd8902),
+ U64_C(0x8f6013f47dfdc190), U64_C(0x17a993fdb637553c),
+ U64_C(0xe0a219397e1012aa), U64_C(0x786b9930b5da8606),
+ U64_C(0x6e82e39e55b0a6da), U64_C(0x875a0856f72f4ec3),
+ U64_C(0x3741ff4fa458536d), U64_C(0xac4859b3957558fc),
+ U64_C(0x7ef6d5c75c09a57c), U64_C(0xc04a758b6c7f14fb),
+ U64_C(0xf9acdd91ab26ebbf), U64_C(0x7391a467c5ef9668),
+ U64_C(0x335c7c1ee1319aca), U64_C(0xa91533b18641e4bb),
+ U64_C(0xe4bf9a683b79db0d), U64_C(0x8e20faa72ba0b470),
+ U64_C(0x51f907737b3a7ae4), U64_C(0x2268a314bed5ec8c),
+ U64_C(0xd944b123b949edee), U64_C(0x31dcb3b84d8b7017),
+ U64_C(0xd3fe65279f218860), U64_C(0x097af2f1dc8ffab3),
+ U64_C(0x9b09a6fc312d0b91), U64_C(0xcc6ded78a3c4520f),
+ U64_C(0x3481d9ba5ebfcc50), U64_C(0x4f2a667f1182d56b),
+ U64_C(0xdfd9fdd4509ace94), U64_C(0x26752045fbbc252b),
+ U64_C(0xbffc491f662bc467), U64_C(0xdd593272fc202449),
+ U64_C(0x3cbbc218d46d4303), U64_C(0x91b372f817456e1f),
+ U64_C(0x681faf69bc6385a0), U64_C(0xb686bbeebaa43ed4),
+ U64_C(0x1469b5084cd0ca01), U64_C(0x98c98009cbca94ac),
+ U64_C(0x6438379a73d8c354), U64_C(0xc2caba2dc0c5fe26),
+ U64_C(0x3e3b0dbe78d7a9de), U64_C(0x50b9ee202d670f04),
+ U64_C(0x4590b27b37eab0e5), U64_C(0x6025b4cb36b10af3),
+ U64_C(0xfb2c1237079c0162), U64_C(0xa12f28130c936be8),
+ U64_C(0x4b37e52e54eb1ccc), U64_C(0x083a1ba28ad28f53),
+ U64_C(0xc10a9cd83a22611b), U64_C(0x9f1425ad7444c236),
+ U64_C(0x069d4cf7e9d3237a), U64_C(0xedc56899e7f621be),
+ U64_C(0x778c273680865fcf), U64_C(0x309c5aeb1bd605f7),
+ U64_C(0x8de0dc52d1472b4d), U64_C(0xf8ec34c2fd7b9e5f),
+ U64_C(0xea18cd3d58787724), U64_C(0xaad515447ca67b86),
+ U64_C(0x9989695a9d97e14c), U64_C(0x0000000000000000),
+ U64_C(0xf196c63321f464ec), U64_C(0x71116bc169557cb5),
+ U64_C(0xaf887f466f92c7c1), U64_C(0x972e3e0ffe964d65),
+ U64_C(0x190ec4a8d536f915), U64_C(0x95aef1a9522ca7b8),
+ U64_C(0xdc19db21aa7d51a9), U64_C(0x94ee18fa0471d258),
+ U64_C(0x8087adf248a11859), U64_C(0xc457f6da2916dd5c),
+ U64_C(0xfa6cfb6451c17482), U64_C(0xf256e0c6db13fbd1),
+ U64_C(0x6a9f60cf10d96f7d), U64_C(0x4daaa9d9bd383fb6),
+ U64_C(0x03c026f5fae79f3d), U64_C(0xde99148706c7bb74),
+ U64_C(0x2a52b8b6340763df), U64_C(0x6fc20acd03edd33a),
+ U64_C(0xd423c08320afdefa), U64_C(0xbbe1ca4e23420dc0),
+ U64_C(0x966ed75ca8cb3885), U64_C(0xeb58246e0e2502c4),
+ U64_C(0x055d6a021334bc47), U64_C(0xa47242111fa7d7af),
+ U64_C(0xe3623fcc84f78d97), U64_C(0x81c744a11efc6db9),
+ U64_C(0xaec8961539cfb221), U64_C(0xf31609958d4e8e31),
+ U64_C(0x63e5923ecc5695ce), U64_C(0x47107ddd9b505a38),
+ U64_C(0xa3afe7b5a0298135), U64_C(0x792b7063e387f3e6),
+ U64_C(0x0140e953565d75e0), U64_C(0x12f4f9ffa503e97b),
+ U64_C(0x750ce8902c3cb512), U64_C(0xdbc47e8515f30733),
+ U64_C(0x1ed3610c6ab8af8f), U64_C(0x5239218681dde5d9),
+ U64_C(0xe222d69fd2aaf877), U64_C(0xfe71783514a8bd25),
+ U64_C(0xcaf0a18f4a177175), U64_C(0x61655d9860ec7f13),
+ U64_C(0xe77fbc9dc19e4430), U64_C(0x2ccff441ddd440a5),
+ U64_C(0x16e97aaee06a20dc), U64_C(0xa855dae2d01c915b),
+ U64_C(0x1d1347f9905f30b2), U64_C(0xb7c652bdecf94b34),
+ U64_C(0xd03e43d265c6175d), U64_C(0xfdb15ec0ee4f2218),
+ U64_C(0x57644b8492e9599e), U64_C(0x07dda5a4bf8e569a),
+ U64_C(0x54a46d71680ec6a3), U64_C(0x5624a2d7c4b42c7e),
+ U64_C(0xbebca04c3076b187), U64_C(0x7d36f332a6ee3a41),
+ U64_C(0x3b6667bc6be31599), U64_C(0x695f463aea3ef040),
+ U64_C(0xad08b0e0c3282d1c), U64_C(0xb15b1e4a052a684e),
+ U64_C(0x44d05b2861b7c505), U64_C(0x15295c5b1a8dbfe1),
+ U64_C(0x744c01c37a61c0f2), U64_C(0x59c31cd1f1e8f5b7),
+ U64_C(0xef45a73f4b4ccb63), U64_C(0x6bdf899c46841a9d),
+ U64_C(0x3dfb2b4b823036e3), U64_C(0xa2ef0ee6f674f4d5),
+ U64_C(0x184e2dfb836b8cf5), U64_C(0x1134df0a5fe47646),
+ U64_C(0xbaa1231d751f7820), U64_C(0xd17eaa81339b62bd),
+ U64_C(0xb01bf71953771dae), U64_C(0x849a2ea30dc8d1fe),
+ U64_C(0x705182923f080955), U64_C(0x0ea757556301ac29),
+ U64_C(0x041d83514569c9a7), U64_C(0x0abad4042668658e),
+ U64_C(0x49b72a88f851f611), U64_C(0x8a3d79f66ec97dd7),
+ U64_C(0xcd2d042bf59927ef), U64_C(0xc930877ab0f0ee48),
+ U64_C(0x9273540deda2f122), U64_C(0xc797d02fd3f14261),
+ U64_C(0xe1e2f06a284d674a), U64_C(0xd2be8c74c97cfd80),
+ U64_C(0x9a494faf67707e71), U64_C(0xb3dbd1eca9908293),
+ U64_C(0x72d14d3493b2e388), U64_C(0xd6a30f258c153427) },
+};
+
+static const u64 C16[12][8] =
+{
+ { U64_C(0xdd806559f2a64507), U64_C(0x05767436cc744d23),
+ U64_C(0xa2422a08a460d315), U64_C(0x4b7ce09192676901),
+ U64_C(0x714eb88d7585c4fc), U64_C(0x2f6a76432e45d016),
+ U64_C(0xebcb2f81c0657c1f), U64_C(0xb1085bda1ecadae9) },
+ { U64_C(0xe679047021b19bb7), U64_C(0x55dda21bd7cbcd56),
+ U64_C(0x5cb561c2db0aa7ca), U64_C(0x9ab5176b12d69958),
+ U64_C(0x61d55e0f16b50131), U64_C(0xf3feea720a232b98),
+ U64_C(0x4fe39d460f70b5d7), U64_C(0x6fa3b58aa99d2f1a) },
+ { U64_C(0x991e96f50aba0ab2), U64_C(0xc2b6f443867adb31),
+ U64_C(0xc1c93a376062db09), U64_C(0xd3e20fe490359eb1),
+ U64_C(0xf2ea7514b1297b7b), U64_C(0x06f15e5f529c1f8b),
+ U64_C(0x0a39fc286a3d8435), U64_C(0xf574dcac2bce2fc7) },
+ { U64_C(0x220cbebc84e3d12e), U64_C(0x3453eaa193e837f1),
+ U64_C(0xd8b71333935203be), U64_C(0xa9d72c82ed03d675),
+ U64_C(0x9d721cad685e353f), U64_C(0x488e857e335c3c7d),
+ U64_C(0xf948e1a05d71e4dd), U64_C(0xef1fdfb3e81566d2) },
+ { U64_C(0x601758fd7c6cfe57), U64_C(0x7a56a27ea9ea63f5),
+ U64_C(0xdfff00b723271a16), U64_C(0xbfcd1747253af5a3),
+ U64_C(0x359e35d7800fffbd), U64_C(0x7f151c1f1686104a),
+ U64_C(0x9a3f410c6ca92363), U64_C(0x4bea6bacad474799) },
+ { U64_C(0xfa68407a46647d6e), U64_C(0xbf71c57236904f35),
+ U64_C(0x0af21f66c2bec6b6), U64_C(0xcffaa6b71c9ab7b4),
+ U64_C(0x187f9ab49af08ec6), U64_C(0x2d66c4f95142a46c),
+ U64_C(0x6fa4c33b7a3039c0), U64_C(0xae4faeae1d3ad3d9) },
+ { U64_C(0x8886564d3a14d493), U64_C(0x3517454ca23c4af3),
+ U64_C(0x06476983284a0504), U64_C(0x0992abc52d822c37),
+ U64_C(0xd3473e33197a93c9), U64_C(0x399ec6c7e6bf87c9),
+ U64_C(0x51ac86febf240954), U64_C(0xf4c70e16eeaac5ec) },
+ { U64_C(0xa47f0dd4bf02e71e), U64_C(0x36acc2355951a8d9),
+ U64_C(0x69d18d2bd1a5c42f), U64_C(0xf4892bcb929b0690),
+ U64_C(0x89b4443b4ddbc49a), U64_C(0x4eb7f8719c36de1e),
+ U64_C(0x03e7aa020c6e4141), U64_C(0x9b1f5b424d93c9a7) },
+ { U64_C(0x7261445183235adb), U64_C(0x0e38dc92cb1f2a60),
+ U64_C(0x7b2b8a9aa6079c54), U64_C(0x800a440bdbb2ceb1),
+ U64_C(0x3cd955b7e00d0984), U64_C(0x3a7d3a1b25894224),
+ U64_C(0x944c9ad8ec165fde), U64_C(0x378f5a541631229b) },
+ { U64_C(0x74b4c7fb98459ced), U64_C(0x3698fad1153bb6c3),
+ U64_C(0x7a1e6c303b7652f4), U64_C(0x9fe76702af69334b),
+ U64_C(0x1fffe18a1b336103), U64_C(0x8941e71cff8a78db),
+ U64_C(0x382ae548b2e4f3f3), U64_C(0xabbedea680056f52) },
+ { U64_C(0x6bcaa4cd81f32d1b), U64_C(0xdea2594ac06fd85d),
+ U64_C(0xefbacd1d7d476e98), U64_C(0x8a1d71efea48b9ca),
+ U64_C(0x2001802114846679), U64_C(0xd8fa6bbbebab0761),
+ U64_C(0x3002c6cd635afe94), U64_C(0x7bcd9ed0efc889fb) },
+ { U64_C(0x48bc924af11bd720), U64_C(0xfaf417d5d9b21b99),
+ U64_C(0xe71da4aa88e12852), U64_C(0x5d80ef9d1891cc86),
+ U64_C(0xf82012d430219f9b), U64_C(0xcda43c32bcdf1d77),
+ U64_C(0xd21380b00449b17a), U64_C(0x378ee767f11631ba) },
+};
+
+
+#define strido(out, temp, i) do { \
+ u64 t; \
+ t = stribog_table[0][(temp[0] >> (i * 8)) & 0xff]; \
+ t ^= stribog_table[1][(temp[1] >> (i * 8)) & 0xff]; \
+ t ^= stribog_table[2][(temp[2] >> (i * 8)) & 0xff]; \
+ t ^= stribog_table[3][(temp[3] >> (i * 8)) & 0xff]; \
+ t ^= stribog_table[4][(temp[4] >> (i * 8)) & 0xff]; \
+ t ^= stribog_table[5][(temp[5] >> (i * 8)) & 0xff]; \
+ t ^= stribog_table[6][(temp[6] >> (i * 8)) & 0xff]; \
+ t ^= stribog_table[7][(temp[7] >> (i * 8)) & 0xff]; \
+ out[i] = t; } while(0)
+
+static void LPSX (u64 *out, const u64 *a, const u64 *b)
+{
+ u64 temp[8];
+ temp[0] = a[0] ^ b[0];
+ temp[1] = a[1] ^ b[1];
+ temp[2] = a[2] ^ b[2];
+ temp[3] = a[3] ^ b[3];
+ temp[4] = a[4] ^ b[4];
+ temp[5] = a[5] ^ b[5];
+ temp[6] = a[6] ^ b[6];
+ temp[7] = a[7] ^ b[7];
+ strido (out, temp, 0);
+ strido (out, temp, 1);
+ strido (out, temp, 2);
+ strido (out, temp, 3);
+ strido (out, temp, 4);
+ strido (out, temp, 5);
+ strido (out, temp, 6);
+ strido (out, temp, 7);
+}
+
+static inline void g (u64 *h, u64 *m, u64 *N)
+{
+ u64 K[8];
+ u64 T[8];
+ int i;
+
+ LPSX (K, h, N);
+
+ LPSX (T, K, m);
+ LPSX (K, K, C16[0]);
+ for (i = 1; i < 12; i++)
+ {
+ LPSX (T, K, T);
+ LPSX (K, K, C16[i]);
+ }
+
+ h[0] ^= T[0] ^ K[0] ^ m[0];
+ h[1] ^= T[1] ^ K[1] ^ m[1];
+ h[2] ^= T[2] ^ K[2] ^ m[2];
+ h[3] ^= T[3] ^ K[3] ^ m[3];
+ h[4] ^= T[4] ^ K[4] ^ m[4];
+ h[5] ^= T[5] ^ K[5] ^ m[5];
+ h[6] ^= T[6] ^ K[6] ^ m[6];
+ h[7] ^= T[7] ^ K[7] ^ m[7];
+}
+
+
+static unsigned int
+transform (void *context, const unsigned char *inbuf_arg, size_t datalen);
+
+
+static void
+stribog_init_512 (void *context, unsigned int flags)
+{
+ STRIBOG_CONTEXT *hd = context;
+
+ (void)flags;
+
+ memset (hd, 0, sizeof (*hd));
+
+ hd->bctx.blocksize_shift = _gcry_ctz(64);
+ hd->bctx.bwrite = transform;
+}
+
+static void
+stribog_init_256 (void *context, unsigned int flags)
+{
+ STRIBOG_CONTEXT *hd = context;
+
+ stribog_init_512 (context, flags);
+ memset (hd->h, 1, 64);
+}
+
+static void
+transform_bits (STRIBOG_CONTEXT *hd, const unsigned char *data, unsigned count)
+{
+ u64 M[8];
+ u64 l, cf;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ M[i] = buf_get_le64(data + i * 8);
+
+ g (hd->h, M, hd->N);
+ l = hd->N[0];
+ hd->N[0] += count;
+ if (hd->N[0] < l)
+ { /* overflow */
+ for (i = 1; i < 8; i++)
+ {
+ hd->N[i]++;
+ if (hd->N[i] != 0)
+ break;
+ }
+ }
+
+ hd->Sigma[0] += M[0];
+ cf = 0;
+ for (i = 1; i < 8; i++)
+ {
+ if (hd->Sigma[i-1] != M[i-1])
+ cf = (hd->Sigma[i-1] < M[i-1]);
+ hd->Sigma[i] += M[i] + cf;
+ }
+}
+
+static unsigned int
+transform_blk (void *context, const unsigned char *inbuf_arg)
+{
+ STRIBOG_CONTEXT *hd = context;
+
+ transform_bits (hd, inbuf_arg, 64 * 8);
+
+ return /* burn_stack */ 768;
+}
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+ unsigned int burn;
+
+ do
+ {
+ burn = transform_blk (c, data);
+ data += 64;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+/*
+ The routine finally terminates the computation and returns the
+ digest. The handle is prepared for a new cycle, but adding bytes
+ to the handle will the destroy the returned buffer. Returns: 32
+ bytes with the message the digest. */
+static void
+stribog_final (void *context)
+{
+ STRIBOG_CONTEXT *hd = context;
+ u64 Z[8] = {};
+ int i;
+
+ /* PAD. It does not count towards message length */
+ i = hd->bctx.count;
+ /* After flush we have at least one byte free) */
+ hd->bctx.buf[i++] = 1;
+ if (i < 64)
+ memset (&hd->bctx.buf[i], 0, 64 - i);
+ i = 64;
+ transform_bits (hd, hd->bctx.buf, hd->bctx.count * 8);
+
+ g (hd->h, hd->N, Z);
+ g (hd->h, hd->Sigma, Z);
+
+ for (i = 0; i < 8; i++)
+ hd->h[i] = le_bswap64(hd->h[i]);
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (768);
+}
+
+static byte *
+stribog_read_512 (void *context)
+{
+ STRIBOG_CONTEXT *hd = context;
+
+ return hd->result;
+}
+
+static byte *
+stribog_read_256 (void *context)
+{
+ STRIBOG_CONTEXT *hd = context;
+
+ return hd->result + 32;
+}
+
+static gcry_md_oid_spec_t oid_spec_stribog256[] =
+ {
+ /* id-tc26-signwithdigest-gost3410-12-256 */
+ { "1.2.643.7.1.1.3.2" },
+ /* id-tc26-gost3411-12-256 */
+ { "1.2.643.7.1.1.2.2" },
+ { NULL },
+ };
+
+static gcry_md_oid_spec_t oid_spec_stribog512[] =
+ {
+ /* id-tc26-signwithdigest-gost3410-12-512 */
+ { "1.2.643.7.1.1.3.3" },
+ /* id-tc26-gost3411-12-512 */
+ { "1.2.643.7.1.1.2.3" },
+ { NULL },
+ };
+
+gcry_md_spec_t _gcry_digest_spec_stribog_256 =
+ {
+ GCRY_MD_STRIBOG256, {0, 0},
+ "STRIBOG256", NULL, 0, oid_spec_stribog256, 32,
+ stribog_init_256, _gcry_md_block_write, stribog_final, stribog_read_256,
+ NULL, NULL, NULL,
+ sizeof (STRIBOG_CONTEXT)
+ };
+
+gcry_md_spec_t _gcry_digest_spec_stribog_512 =
+ {
+ GCRY_MD_STRIBOG512, {0, 0},
+ "STRIBOG512", NULL, 0, oid_spec_stribog512, 64,
+ stribog_init_512, _gcry_md_block_write, stribog_final, stribog_read_512,
+ NULL, NULL, NULL,
+ sizeof (STRIBOG_CONTEXT)
+ };
diff --git a/comm/third_party/libgcrypt/cipher/tiger.c b/comm/third_party/libgcrypt/cipher/tiger.c
new file mode 100644
index 0000000000..4039b22b1c
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/tiger.c
@@ -0,0 +1,860 @@
+/* tiger.c - The TIGER hash function
+ * Copyright (C) 1998, 2001, 2002, 2003, 2010 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* See http://www.cs.technion.ac.il/~biham/Reports/Tiger/ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "hash-common.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+
+typedef struct
+{
+ gcry_md_block_ctx_t bctx;
+ u64 a, b, c;
+ int variant; /* 0 = old code, 1 = fixed code, 2 - TIGER2. */
+} TIGER_CONTEXT;
+
+
+/*********************************
+ * Okay, okay, this is not the fastest code - improvements are welcome.
+ *
+ */
+
+/* Some test vectors:
+ * "" 24F0130C63AC9332 16166E76B1BB925F F373DE2D49584E7A
+ * "abc" F258C1E88414AB2A 527AB541FFC5B8BF 935F7B951C132951
+ * "Tiger" 9F00F599072300DD 276ABB38C8EB6DEC 37790C116F9D2BDF
+ * "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"
+ * 87FB2A9083851CF7 470D2CF810E6DF9E B586445034A5A386
+ * "ABCDEFGHIJKLMNOPQRSTUVWXYZ=abcdefghijklmnopqrstuvwxyz+0123456789"
+ * 467DB80863EBCE48 8DF1CD1261655DE9 57896565975F9197
+ * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham"
+ * 0C410A042968868A 1671DA5A3FD29A72 5EC1E457D3CDB303
+ * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham, proc"
+ * "eedings of Fast Software Encryption 3, Cambridge."
+ * EBF591D5AFA655CE 7F22894FF87F54AC 89C811B6B0DA3193
+ * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham, proc"
+ * "eedings of Fast Software Encryption 3, Cambridge, 1996."
+ * 3D9AEB03D1BD1A63 57B2774DFD6D5B24 DD68151D503974FC
+ * "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-ABCDEF"
+ * "GHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"
+ * 00B83EB4E53440C5 76AC6AAEE0A74858 25FD15E70A59FFE4
+ */
+
+static u64 sbox1[256] = {
+ U64_C(0x02aab17cf7e90c5e) /* 0 */, U64_C(0xac424b03e243a8ec) /* 1 */,
+ U64_C(0x72cd5be30dd5fcd3) /* 2 */, U64_C(0x6d019b93f6f97f3a) /* 3 */,
+ U64_C(0xcd9978ffd21f9193) /* 4 */, U64_C(0x7573a1c9708029e2) /* 5 */,
+ U64_C(0xb164326b922a83c3) /* 6 */, U64_C(0x46883eee04915870) /* 7 */,
+ U64_C(0xeaace3057103ece6) /* 8 */, U64_C(0xc54169b808a3535c) /* 9 */,
+ U64_C(0x4ce754918ddec47c) /* 10 */, U64_C(0x0aa2f4dfdc0df40c) /* 11 */,
+ U64_C(0x10b76f18a74dbefa) /* 12 */, U64_C(0xc6ccb6235ad1ab6a) /* 13 */,
+ U64_C(0x13726121572fe2ff) /* 14 */, U64_C(0x1a488c6f199d921e) /* 15 */,
+ U64_C(0x4bc9f9f4da0007ca) /* 16 */, U64_C(0x26f5e6f6e85241c7) /* 17 */,
+ U64_C(0x859079dbea5947b6) /* 18 */, U64_C(0x4f1885c5c99e8c92) /* 19 */,
+ U64_C(0xd78e761ea96f864b) /* 20 */, U64_C(0x8e36428c52b5c17d) /* 21 */,
+ U64_C(0x69cf6827373063c1) /* 22 */, U64_C(0xb607c93d9bb4c56e) /* 23 */,
+ U64_C(0x7d820e760e76b5ea) /* 24 */, U64_C(0x645c9cc6f07fdc42) /* 25 */,
+ U64_C(0xbf38a078243342e0) /* 26 */, U64_C(0x5f6b343c9d2e7d04) /* 27 */,
+ U64_C(0xf2c28aeb600b0ec6) /* 28 */, U64_C(0x6c0ed85f7254bcac) /* 29 */,
+ U64_C(0x71592281a4db4fe5) /* 30 */, U64_C(0x1967fa69ce0fed9f) /* 31 */,
+ U64_C(0xfd5293f8b96545db) /* 32 */, U64_C(0xc879e9d7f2a7600b) /* 33 */,
+ U64_C(0x860248920193194e) /* 34 */, U64_C(0xa4f9533b2d9cc0b3) /* 35 */,
+ U64_C(0x9053836c15957613) /* 36 */, U64_C(0xdb6dcf8afc357bf1) /* 37 */,
+ U64_C(0x18beea7a7a370f57) /* 38 */, U64_C(0x037117ca50b99066) /* 39 */,
+ U64_C(0x6ab30a9774424a35) /* 40 */, U64_C(0xf4e92f02e325249b) /* 41 */,
+ U64_C(0x7739db07061ccae1) /* 42 */, U64_C(0xd8f3b49ceca42a05) /* 43 */,
+ U64_C(0xbd56be3f51382f73) /* 44 */, U64_C(0x45faed5843b0bb28) /* 45 */,
+ U64_C(0x1c813d5c11bf1f83) /* 46 */, U64_C(0x8af0e4b6d75fa169) /* 47 */,
+ U64_C(0x33ee18a487ad9999) /* 48 */, U64_C(0x3c26e8eab1c94410) /* 49 */,
+ U64_C(0xb510102bc0a822f9) /* 50 */, U64_C(0x141eef310ce6123b) /* 51 */,
+ U64_C(0xfc65b90059ddb154) /* 52 */, U64_C(0xe0158640c5e0e607) /* 53 */,
+ U64_C(0x884e079826c3a3cf) /* 54 */, U64_C(0x930d0d9523c535fd) /* 55 */,
+ U64_C(0x35638d754e9a2b00) /* 56 */, U64_C(0x4085fccf40469dd5) /* 57 */,
+ U64_C(0xc4b17ad28be23a4c) /* 58 */, U64_C(0xcab2f0fc6a3e6a2e) /* 59 */,
+ U64_C(0x2860971a6b943fcd) /* 60 */, U64_C(0x3dde6ee212e30446) /* 61 */,
+ U64_C(0x6222f32ae01765ae) /* 62 */, U64_C(0x5d550bb5478308fe) /* 63 */,
+ U64_C(0xa9efa98da0eda22a) /* 64 */, U64_C(0xc351a71686c40da7) /* 65 */,
+ U64_C(0x1105586d9c867c84) /* 66 */, U64_C(0xdcffee85fda22853) /* 67 */,
+ U64_C(0xccfbd0262c5eef76) /* 68 */, U64_C(0xbaf294cb8990d201) /* 69 */,
+ U64_C(0xe69464f52afad975) /* 70 */, U64_C(0x94b013afdf133e14) /* 71 */,
+ U64_C(0x06a7d1a32823c958) /* 72 */, U64_C(0x6f95fe5130f61119) /* 73 */,
+ U64_C(0xd92ab34e462c06c0) /* 74 */, U64_C(0xed7bde33887c71d2) /* 75 */,
+ U64_C(0x79746d6e6518393e) /* 76 */, U64_C(0x5ba419385d713329) /* 77 */,
+ U64_C(0x7c1ba6b948a97564) /* 78 */, U64_C(0x31987c197bfdac67) /* 79 */,
+ U64_C(0xde6c23c44b053d02) /* 80 */, U64_C(0x581c49fed002d64d) /* 81 */,
+ U64_C(0xdd474d6338261571) /* 82 */, U64_C(0xaa4546c3e473d062) /* 83 */,
+ U64_C(0x928fce349455f860) /* 84 */, U64_C(0x48161bbacaab94d9) /* 85 */,
+ U64_C(0x63912430770e6f68) /* 86 */, U64_C(0x6ec8a5e602c6641c) /* 87 */,
+ U64_C(0x87282515337ddd2b) /* 88 */, U64_C(0x2cda6b42034b701b) /* 89 */,
+ U64_C(0xb03d37c181cb096d) /* 90 */, U64_C(0xe108438266c71c6f) /* 91 */,
+ U64_C(0x2b3180c7eb51b255) /* 92 */, U64_C(0xdf92b82f96c08bbc) /* 93 */,
+ U64_C(0x5c68c8c0a632f3ba) /* 94 */, U64_C(0x5504cc861c3d0556) /* 95 */,
+ U64_C(0xabbfa4e55fb26b8f) /* 96 */, U64_C(0x41848b0ab3baceb4) /* 97 */,
+ U64_C(0xb334a273aa445d32) /* 98 */, U64_C(0xbca696f0a85ad881) /* 99 */,
+ U64_C(0x24f6ec65b528d56c) /* 100 */, U64_C(0x0ce1512e90f4524a) /* 101 */,
+ U64_C(0x4e9dd79d5506d35a) /* 102 */, U64_C(0x258905fac6ce9779) /* 103 */,
+ U64_C(0x2019295b3e109b33) /* 104 */, U64_C(0xf8a9478b73a054cc) /* 105 */,
+ U64_C(0x2924f2f934417eb0) /* 106 */, U64_C(0x3993357d536d1bc4) /* 107 */,
+ U64_C(0x38a81ac21db6ff8b) /* 108 */, U64_C(0x47c4fbf17d6016bf) /* 109 */,
+ U64_C(0x1e0faadd7667e3f5) /* 110 */, U64_C(0x7abcff62938beb96) /* 111 */,
+ U64_C(0xa78dad948fc179c9) /* 112 */, U64_C(0x8f1f98b72911e50d) /* 113 */,
+ U64_C(0x61e48eae27121a91) /* 114 */, U64_C(0x4d62f7ad31859808) /* 115 */,
+ U64_C(0xeceba345ef5ceaeb) /* 116 */, U64_C(0xf5ceb25ebc9684ce) /* 117 */,
+ U64_C(0xf633e20cb7f76221) /* 118 */, U64_C(0xa32cdf06ab8293e4) /* 119 */,
+ U64_C(0x985a202ca5ee2ca4) /* 120 */, U64_C(0xcf0b8447cc8a8fb1) /* 121 */,
+ U64_C(0x9f765244979859a3) /* 122 */, U64_C(0xa8d516b1a1240017) /* 123 */,
+ U64_C(0x0bd7ba3ebb5dc726) /* 124 */, U64_C(0xe54bca55b86adb39) /* 125 */,
+ U64_C(0x1d7a3afd6c478063) /* 126 */, U64_C(0x519ec608e7669edd) /* 127 */,
+ U64_C(0x0e5715a2d149aa23) /* 128 */, U64_C(0x177d4571848ff194) /* 129 */,
+ U64_C(0xeeb55f3241014c22) /* 130 */, U64_C(0x0f5e5ca13a6e2ec2) /* 131 */,
+ U64_C(0x8029927b75f5c361) /* 132 */, U64_C(0xad139fabc3d6e436) /* 133 */,
+ U64_C(0x0d5df1a94ccf402f) /* 134 */, U64_C(0x3e8bd948bea5dfc8) /* 135 */,
+ U64_C(0xa5a0d357bd3ff77e) /* 136 */, U64_C(0xa2d12e251f74f645) /* 137 */,
+ U64_C(0x66fd9e525e81a082) /* 138 */, U64_C(0x2e0c90ce7f687a49) /* 139 */,
+ U64_C(0xc2e8bcbeba973bc5) /* 140 */, U64_C(0x000001bce509745f) /* 141 */,
+ U64_C(0x423777bbe6dab3d6) /* 142 */, U64_C(0xd1661c7eaef06eb5) /* 143 */,
+ U64_C(0xa1781f354daacfd8) /* 144 */, U64_C(0x2d11284a2b16affc) /* 145 */,
+ U64_C(0xf1fc4f67fa891d1f) /* 146 */, U64_C(0x73ecc25dcb920ada) /* 147 */,
+ U64_C(0xae610c22c2a12651) /* 148 */, U64_C(0x96e0a810d356b78a) /* 149 */,
+ U64_C(0x5a9a381f2fe7870f) /* 150 */, U64_C(0xd5ad62ede94e5530) /* 151 */,
+ U64_C(0xd225e5e8368d1427) /* 152 */, U64_C(0x65977b70c7af4631) /* 153 */,
+ U64_C(0x99f889b2de39d74f) /* 154 */, U64_C(0x233f30bf54e1d143) /* 155 */,
+ U64_C(0x9a9675d3d9a63c97) /* 156 */, U64_C(0x5470554ff334f9a8) /* 157 */,
+ U64_C(0x166acb744a4f5688) /* 158 */, U64_C(0x70c74caab2e4aead) /* 159 */,
+ U64_C(0xf0d091646f294d12) /* 160 */, U64_C(0x57b82a89684031d1) /* 161 */,
+ U64_C(0xefd95a5a61be0b6b) /* 162 */, U64_C(0x2fbd12e969f2f29a) /* 163 */,
+ U64_C(0x9bd37013feff9fe8) /* 164 */, U64_C(0x3f9b0404d6085a06) /* 165 */,
+ U64_C(0x4940c1f3166cfe15) /* 166 */, U64_C(0x09542c4dcdf3defb) /* 167 */,
+ U64_C(0xb4c5218385cd5ce3) /* 168 */, U64_C(0xc935b7dc4462a641) /* 169 */,
+ U64_C(0x3417f8a68ed3b63f) /* 170 */, U64_C(0xb80959295b215b40) /* 171 */,
+ U64_C(0xf99cdaef3b8c8572) /* 172 */, U64_C(0x018c0614f8fcb95d) /* 173 */,
+ U64_C(0x1b14accd1a3acdf3) /* 174 */, U64_C(0x84d471f200bb732d) /* 175 */,
+ U64_C(0xc1a3110e95e8da16) /* 176 */, U64_C(0x430a7220bf1a82b8) /* 177 */,
+ U64_C(0xb77e090d39df210e) /* 178 */, U64_C(0x5ef4bd9f3cd05e9d) /* 179 */,
+ U64_C(0x9d4ff6da7e57a444) /* 180 */, U64_C(0xda1d60e183d4a5f8) /* 181 */,
+ U64_C(0xb287c38417998e47) /* 182 */, U64_C(0xfe3edc121bb31886) /* 183 */,
+ U64_C(0xc7fe3ccc980ccbef) /* 184 */, U64_C(0xe46fb590189bfd03) /* 185 */,
+ U64_C(0x3732fd469a4c57dc) /* 186 */, U64_C(0x7ef700a07cf1ad65) /* 187 */,
+ U64_C(0x59c64468a31d8859) /* 188 */, U64_C(0x762fb0b4d45b61f6) /* 189 */,
+ U64_C(0x155baed099047718) /* 190 */, U64_C(0x68755e4c3d50baa6) /* 191 */,
+ U64_C(0xe9214e7f22d8b4df) /* 192 */, U64_C(0x2addbf532eac95f4) /* 193 */,
+ U64_C(0x32ae3909b4bd0109) /* 194 */, U64_C(0x834df537b08e3450) /* 195 */,
+ U64_C(0xfa209da84220728d) /* 196 */, U64_C(0x9e691d9b9efe23f7) /* 197 */,
+ U64_C(0x0446d288c4ae8d7f) /* 198 */, U64_C(0x7b4cc524e169785b) /* 199 */,
+ U64_C(0x21d87f0135ca1385) /* 200 */, U64_C(0xcebb400f137b8aa5) /* 201 */,
+ U64_C(0x272e2b66580796be) /* 202 */, U64_C(0x3612264125c2b0de) /* 203 */,
+ U64_C(0x057702bdad1efbb2) /* 204 */, U64_C(0xd4babb8eacf84be9) /* 205 */,
+ U64_C(0x91583139641bc67b) /* 206 */, U64_C(0x8bdc2de08036e024) /* 207 */,
+ U64_C(0x603c8156f49f68ed) /* 208 */, U64_C(0xf7d236f7dbef5111) /* 209 */,
+ U64_C(0x9727c4598ad21e80) /* 210 */, U64_C(0xa08a0896670a5fd7) /* 211 */,
+ U64_C(0xcb4a8f4309eba9cb) /* 212 */, U64_C(0x81af564b0f7036a1) /* 213 */,
+ U64_C(0xc0b99aa778199abd) /* 214 */, U64_C(0x959f1ec83fc8e952) /* 215 */,
+ U64_C(0x8c505077794a81b9) /* 216 */, U64_C(0x3acaaf8f056338f0) /* 217 */,
+ U64_C(0x07b43f50627a6778) /* 218 */, U64_C(0x4a44ab49f5eccc77) /* 219 */,
+ U64_C(0x3bc3d6e4b679ee98) /* 220 */, U64_C(0x9cc0d4d1cf14108c) /* 221 */,
+ U64_C(0x4406c00b206bc8a0) /* 222 */, U64_C(0x82a18854c8d72d89) /* 223 */,
+ U64_C(0x67e366b35c3c432c) /* 224 */, U64_C(0xb923dd61102b37f2) /* 225 */,
+ U64_C(0x56ab2779d884271d) /* 226 */, U64_C(0xbe83e1b0ff1525af) /* 227 */,
+ U64_C(0xfb7c65d4217e49a9) /* 228 */, U64_C(0x6bdbe0e76d48e7d4) /* 229 */,
+ U64_C(0x08df828745d9179e) /* 230 */, U64_C(0x22ea6a9add53bd34) /* 231 */,
+ U64_C(0xe36e141c5622200a) /* 232 */, U64_C(0x7f805d1b8cb750ee) /* 233 */,
+ U64_C(0xafe5c7a59f58e837) /* 234 */, U64_C(0xe27f996a4fb1c23c) /* 235 */,
+ U64_C(0xd3867dfb0775f0d0) /* 236 */, U64_C(0xd0e673de6e88891a) /* 237 */,
+ U64_C(0x123aeb9eafb86c25) /* 238 */, U64_C(0x30f1d5d5c145b895) /* 239 */,
+ U64_C(0xbb434a2dee7269e7) /* 240 */, U64_C(0x78cb67ecf931fa38) /* 241 */,
+ U64_C(0xf33b0372323bbf9c) /* 242 */, U64_C(0x52d66336fb279c74) /* 243 */,
+ U64_C(0x505f33ac0afb4eaa) /* 244 */, U64_C(0xe8a5cd99a2cce187) /* 245 */,
+ U64_C(0x534974801e2d30bb) /* 246 */, U64_C(0x8d2d5711d5876d90) /* 247 */,
+ U64_C(0x1f1a412891bc038e) /* 248 */, U64_C(0xd6e2e71d82e56648) /* 249 */,
+ U64_C(0x74036c3a497732b7) /* 250 */, U64_C(0x89b67ed96361f5ab) /* 251 */,
+ U64_C(0xffed95d8f1ea02a2) /* 252 */, U64_C(0xe72b3bd61464d43d) /* 253 */,
+ U64_C(0xa6300f170bdc4820) /* 254 */, U64_C(0xebc18760ed78a77a) /* 255 */
+};
+static u64 sbox2[256] = {
+ U64_C(0xe6a6be5a05a12138) /* 256 */, U64_C(0xb5a122a5b4f87c98) /* 257 */,
+ U64_C(0x563c6089140b6990) /* 258 */, U64_C(0x4c46cb2e391f5dd5) /* 259 */,
+ U64_C(0xd932addbc9b79434) /* 260 */, U64_C(0x08ea70e42015aff5) /* 261 */,
+ U64_C(0xd765a6673e478cf1) /* 262 */, U64_C(0xc4fb757eab278d99) /* 263 */,
+ U64_C(0xdf11c6862d6e0692) /* 264 */, U64_C(0xddeb84f10d7f3b16) /* 265 */,
+ U64_C(0x6f2ef604a665ea04) /* 266 */, U64_C(0x4a8e0f0ff0e0dfb3) /* 267 */,
+ U64_C(0xa5edeef83dbcba51) /* 268 */, U64_C(0xfc4f0a2a0ea4371e) /* 269 */,
+ U64_C(0xe83e1da85cb38429) /* 270 */, U64_C(0xdc8ff882ba1b1ce2) /* 271 */,
+ U64_C(0xcd45505e8353e80d) /* 272 */, U64_C(0x18d19a00d4db0717) /* 273 */,
+ U64_C(0x34a0cfeda5f38101) /* 274 */, U64_C(0x0be77e518887caf2) /* 275 */,
+ U64_C(0x1e341438b3c45136) /* 276 */, U64_C(0xe05797f49089ccf9) /* 277 */,
+ U64_C(0xffd23f9df2591d14) /* 278 */, U64_C(0x543dda228595c5cd) /* 279 */,
+ U64_C(0x661f81fd99052a33) /* 280 */, U64_C(0x8736e641db0f7b76) /* 281 */,
+ U64_C(0x15227725418e5307) /* 282 */, U64_C(0xe25f7f46162eb2fa) /* 283 */,
+ U64_C(0x48a8b2126c13d9fe) /* 284 */, U64_C(0xafdc541792e76eea) /* 285 */,
+ U64_C(0x03d912bfc6d1898f) /* 286 */, U64_C(0x31b1aafa1b83f51b) /* 287 */,
+ U64_C(0xf1ac2796e42ab7d9) /* 288 */, U64_C(0x40a3a7d7fcd2ebac) /* 289 */,
+ U64_C(0x1056136d0afbbcc5) /* 290 */, U64_C(0x7889e1dd9a6d0c85) /* 291 */,
+ U64_C(0xd33525782a7974aa) /* 292 */, U64_C(0xa7e25d09078ac09b) /* 293 */,
+ U64_C(0xbd4138b3eac6edd0) /* 294 */, U64_C(0x920abfbe71eb9e70) /* 295 */,
+ U64_C(0xa2a5d0f54fc2625c) /* 296 */, U64_C(0xc054e36b0b1290a3) /* 297 */,
+ U64_C(0xf6dd59ff62fe932b) /* 298 */, U64_C(0x3537354511a8ac7d) /* 299 */,
+ U64_C(0xca845e9172fadcd4) /* 300 */, U64_C(0x84f82b60329d20dc) /* 301 */,
+ U64_C(0x79c62ce1cd672f18) /* 302 */, U64_C(0x8b09a2add124642c) /* 303 */,
+ U64_C(0xd0c1e96a19d9e726) /* 304 */, U64_C(0x5a786a9b4ba9500c) /* 305 */,
+ U64_C(0x0e020336634c43f3) /* 306 */, U64_C(0xc17b474aeb66d822) /* 307 */,
+ U64_C(0x6a731ae3ec9baac2) /* 308 */, U64_C(0x8226667ae0840258) /* 309 */,
+ U64_C(0x67d4567691caeca5) /* 310 */, U64_C(0x1d94155c4875adb5) /* 311 */,
+ U64_C(0x6d00fd985b813fdf) /* 312 */, U64_C(0x51286efcb774cd06) /* 313 */,
+ U64_C(0x5e8834471fa744af) /* 314 */, U64_C(0xf72ca0aee761ae2e) /* 315 */,
+ U64_C(0xbe40e4cdaee8e09a) /* 316 */, U64_C(0xe9970bbb5118f665) /* 317 */,
+ U64_C(0x726e4beb33df1964) /* 318 */, U64_C(0x703b000729199762) /* 319 */,
+ U64_C(0x4631d816f5ef30a7) /* 320 */, U64_C(0xb880b5b51504a6be) /* 321 */,
+ U64_C(0x641793c37ed84b6c) /* 322 */, U64_C(0x7b21ed77f6e97d96) /* 323 */,
+ U64_C(0x776306312ef96b73) /* 324 */, U64_C(0xae528948e86ff3f4) /* 325 */,
+ U64_C(0x53dbd7f286a3f8f8) /* 326 */, U64_C(0x16cadce74cfc1063) /* 327 */,
+ U64_C(0x005c19bdfa52c6dd) /* 328 */, U64_C(0x68868f5d64d46ad3) /* 329 */,
+ U64_C(0x3a9d512ccf1e186a) /* 330 */, U64_C(0x367e62c2385660ae) /* 331 */,
+ U64_C(0xe359e7ea77dcb1d7) /* 332 */, U64_C(0x526c0773749abe6e) /* 333 */,
+ U64_C(0x735ae5f9d09f734b) /* 334 */, U64_C(0x493fc7cc8a558ba8) /* 335 */,
+ U64_C(0xb0b9c1533041ab45) /* 336 */, U64_C(0x321958ba470a59bd) /* 337 */,
+ U64_C(0x852db00b5f46c393) /* 338 */, U64_C(0x91209b2bd336b0e5) /* 339 */,
+ U64_C(0x6e604f7d659ef19f) /* 340 */, U64_C(0xb99a8ae2782ccb24) /* 341 */,
+ U64_C(0xccf52ab6c814c4c7) /* 342 */, U64_C(0x4727d9afbe11727b) /* 343 */,
+ U64_C(0x7e950d0c0121b34d) /* 344 */, U64_C(0x756f435670ad471f) /* 345 */,
+ U64_C(0xf5add442615a6849) /* 346 */, U64_C(0x4e87e09980b9957a) /* 347 */,
+ U64_C(0x2acfa1df50aee355) /* 348 */, U64_C(0xd898263afd2fd556) /* 349 */,
+ U64_C(0xc8f4924dd80c8fd6) /* 350 */, U64_C(0xcf99ca3d754a173a) /* 351 */,
+ U64_C(0xfe477bacaf91bf3c) /* 352 */, U64_C(0xed5371f6d690c12d) /* 353 */,
+ U64_C(0x831a5c285e687094) /* 354 */, U64_C(0xc5d3c90a3708a0a4) /* 355 */,
+ U64_C(0x0f7f903717d06580) /* 356 */, U64_C(0x19f9bb13b8fdf27f) /* 357 */,
+ U64_C(0xb1bd6f1b4d502843) /* 358 */, U64_C(0x1c761ba38fff4012) /* 359 */,
+ U64_C(0x0d1530c4e2e21f3b) /* 360 */, U64_C(0x8943ce69a7372c8a) /* 361 */,
+ U64_C(0xe5184e11feb5ce66) /* 362 */, U64_C(0x618bdb80bd736621) /* 363 */,
+ U64_C(0x7d29bad68b574d0b) /* 364 */, U64_C(0x81bb613e25e6fe5b) /* 365 */,
+ U64_C(0x071c9c10bc07913f) /* 366 */, U64_C(0xc7beeb7909ac2d97) /* 367 */,
+ U64_C(0xc3e58d353bc5d757) /* 368 */, U64_C(0xeb017892f38f61e8) /* 369 */,
+ U64_C(0xd4effb9c9b1cc21a) /* 370 */, U64_C(0x99727d26f494f7ab) /* 371 */,
+ U64_C(0xa3e063a2956b3e03) /* 372 */, U64_C(0x9d4a8b9a4aa09c30) /* 373 */,
+ U64_C(0x3f6ab7d500090fb4) /* 374 */, U64_C(0x9cc0f2a057268ac0) /* 375 */,
+ U64_C(0x3dee9d2dedbf42d1) /* 376 */, U64_C(0x330f49c87960a972) /* 377 */,
+ U64_C(0xc6b2720287421b41) /* 378 */, U64_C(0x0ac59ec07c00369c) /* 379 */,
+ U64_C(0xef4eac49cb353425) /* 380 */, U64_C(0xf450244eef0129d8) /* 381 */,
+ U64_C(0x8acc46e5caf4deb6) /* 382 */, U64_C(0x2ffeab63989263f7) /* 383 */,
+ U64_C(0x8f7cb9fe5d7a4578) /* 384 */, U64_C(0x5bd8f7644e634635) /* 385 */,
+ U64_C(0x427a7315bf2dc900) /* 386 */, U64_C(0x17d0c4aa2125261c) /* 387 */,
+ U64_C(0x3992486c93518e50) /* 388 */, U64_C(0xb4cbfee0a2d7d4c3) /* 389 */,
+ U64_C(0x7c75d6202c5ddd8d) /* 390 */, U64_C(0xdbc295d8e35b6c61) /* 391 */,
+ U64_C(0x60b369d302032b19) /* 392 */, U64_C(0xce42685fdce44132) /* 393 */,
+ U64_C(0x06f3ddb9ddf65610) /* 394 */, U64_C(0x8ea4d21db5e148f0) /* 395 */,
+ U64_C(0x20b0fce62fcd496f) /* 396 */, U64_C(0x2c1b912358b0ee31) /* 397 */,
+ U64_C(0xb28317b818f5a308) /* 398 */, U64_C(0xa89c1e189ca6d2cf) /* 399 */,
+ U64_C(0x0c6b18576aaadbc8) /* 400 */, U64_C(0xb65deaa91299fae3) /* 401 */,
+ U64_C(0xfb2b794b7f1027e7) /* 402 */, U64_C(0x04e4317f443b5beb) /* 403 */,
+ U64_C(0x4b852d325939d0a6) /* 404 */, U64_C(0xd5ae6beefb207ffc) /* 405 */,
+ U64_C(0x309682b281c7d374) /* 406 */, U64_C(0xbae309a194c3b475) /* 407 */,
+ U64_C(0x8cc3f97b13b49f05) /* 408 */, U64_C(0x98a9422ff8293967) /* 409 */,
+ U64_C(0x244b16b01076ff7c) /* 410 */, U64_C(0xf8bf571c663d67ee) /* 411 */,
+ U64_C(0x1f0d6758eee30da1) /* 412 */, U64_C(0xc9b611d97adeb9b7) /* 413 */,
+ U64_C(0xb7afd5887b6c57a2) /* 414 */, U64_C(0x6290ae846b984fe1) /* 415 */,
+ U64_C(0x94df4cdeacc1a5fd) /* 416 */, U64_C(0x058a5bd1c5483aff) /* 417 */,
+ U64_C(0x63166cc142ba3c37) /* 418 */, U64_C(0x8db8526eb2f76f40) /* 419 */,
+ U64_C(0xe10880036f0d6d4e) /* 420 */, U64_C(0x9e0523c9971d311d) /* 421 */,
+ U64_C(0x45ec2824cc7cd691) /* 422 */, U64_C(0x575b8359e62382c9) /* 423 */,
+ U64_C(0xfa9e400dc4889995) /* 424 */, U64_C(0xd1823ecb45721568) /* 425 */,
+ U64_C(0xdafd983b8206082f) /* 426 */, U64_C(0xaa7d29082386a8cb) /* 427 */,
+ U64_C(0x269fcd4403b87588) /* 428 */, U64_C(0x1b91f5f728bdd1e0) /* 429 */,
+ U64_C(0xe4669f39040201f6) /* 430 */, U64_C(0x7a1d7c218cf04ade) /* 431 */,
+ U64_C(0x65623c29d79ce5ce) /* 432 */, U64_C(0x2368449096c00bb1) /* 433 */,
+ U64_C(0xab9bf1879da503ba) /* 434 */, U64_C(0xbc23ecb1a458058e) /* 435 */,
+ U64_C(0x9a58df01bb401ecc) /* 436 */, U64_C(0xa070e868a85f143d) /* 437 */,
+ U64_C(0x4ff188307df2239e) /* 438 */, U64_C(0x14d565b41a641183) /* 439 */,
+ U64_C(0xee13337452701602) /* 440 */, U64_C(0x950e3dcf3f285e09) /* 441 */,
+ U64_C(0x59930254b9c80953) /* 442 */, U64_C(0x3bf299408930da6d) /* 443 */,
+ U64_C(0xa955943f53691387) /* 444 */, U64_C(0xa15edecaa9cb8784) /* 445 */,
+ U64_C(0x29142127352be9a0) /* 446 */, U64_C(0x76f0371fff4e7afb) /* 447 */,
+ U64_C(0x0239f450274f2228) /* 448 */, U64_C(0xbb073af01d5e868b) /* 449 */,
+ U64_C(0xbfc80571c10e96c1) /* 450 */, U64_C(0xd267088568222e23) /* 451 */,
+ U64_C(0x9671a3d48e80b5b0) /* 452 */, U64_C(0x55b5d38ae193bb81) /* 453 */,
+ U64_C(0x693ae2d0a18b04b8) /* 454 */, U64_C(0x5c48b4ecadd5335f) /* 455 */,
+ U64_C(0xfd743b194916a1ca) /* 456 */, U64_C(0x2577018134be98c4) /* 457 */,
+ U64_C(0xe77987e83c54a4ad) /* 458 */, U64_C(0x28e11014da33e1b9) /* 459 */,
+ U64_C(0x270cc59e226aa213) /* 460 */, U64_C(0x71495f756d1a5f60) /* 461 */,
+ U64_C(0x9be853fb60afef77) /* 462 */, U64_C(0xadc786a7f7443dbf) /* 463 */,
+ U64_C(0x0904456173b29a82) /* 464 */, U64_C(0x58bc7a66c232bd5e) /* 465 */,
+ U64_C(0xf306558c673ac8b2) /* 466 */, U64_C(0x41f639c6b6c9772a) /* 467 */,
+ U64_C(0x216defe99fda35da) /* 468 */, U64_C(0x11640cc71c7be615) /* 469 */,
+ U64_C(0x93c43694565c5527) /* 470 */, U64_C(0xea038e6246777839) /* 471 */,
+ U64_C(0xf9abf3ce5a3e2469) /* 472 */, U64_C(0x741e768d0fd312d2) /* 473 */,
+ U64_C(0x0144b883ced652c6) /* 474 */, U64_C(0xc20b5a5ba33f8552) /* 475 */,
+ U64_C(0x1ae69633c3435a9d) /* 476 */, U64_C(0x97a28ca4088cfdec) /* 477 */,
+ U64_C(0x8824a43c1e96f420) /* 478 */, U64_C(0x37612fa66eeea746) /* 479 */,
+ U64_C(0x6b4cb165f9cf0e5a) /* 480 */, U64_C(0x43aa1c06a0abfb4a) /* 481 */,
+ U64_C(0x7f4dc26ff162796b) /* 482 */, U64_C(0x6cbacc8e54ed9b0f) /* 483 */,
+ U64_C(0xa6b7ffefd2bb253e) /* 484 */, U64_C(0x2e25bc95b0a29d4f) /* 485 */,
+ U64_C(0x86d6a58bdef1388c) /* 486 */, U64_C(0xded74ac576b6f054) /* 487 */,
+ U64_C(0x8030bdbc2b45805d) /* 488 */, U64_C(0x3c81af70e94d9289) /* 489 */,
+ U64_C(0x3eff6dda9e3100db) /* 490 */, U64_C(0xb38dc39fdfcc8847) /* 491 */,
+ U64_C(0x123885528d17b87e) /* 492 */, U64_C(0xf2da0ed240b1b642) /* 493 */,
+ U64_C(0x44cefadcd54bf9a9) /* 494 */, U64_C(0x1312200e433c7ee6) /* 495 */,
+ U64_C(0x9ffcc84f3a78c748) /* 496 */, U64_C(0xf0cd1f72248576bb) /* 497 */,
+ U64_C(0xec6974053638cfe4) /* 498 */, U64_C(0x2ba7b67c0cec4e4c) /* 499 */,
+ U64_C(0xac2f4df3e5ce32ed) /* 500 */, U64_C(0xcb33d14326ea4c11) /* 501 */,
+ U64_C(0xa4e9044cc77e58bc) /* 502 */, U64_C(0x5f513293d934fcef) /* 503 */,
+ U64_C(0x5dc9645506e55444) /* 504 */, U64_C(0x50de418f317de40a) /* 505 */,
+ U64_C(0x388cb31a69dde259) /* 506 */, U64_C(0x2db4a83455820a86) /* 507 */,
+ U64_C(0x9010a91e84711ae9) /* 508 */, U64_C(0x4df7f0b7b1498371) /* 509 */,
+ U64_C(0xd62a2eabc0977179) /* 510 */, U64_C(0x22fac097aa8d5c0e) /* 511 */
+};
+static u64 sbox3[256] = {
+ U64_C(0xf49fcc2ff1daf39b) /* 512 */, U64_C(0x487fd5c66ff29281) /* 513 */,
+ U64_C(0xe8a30667fcdca83f) /* 514 */, U64_C(0x2c9b4be3d2fcce63) /* 515 */,
+ U64_C(0xda3ff74b93fbbbc2) /* 516 */, U64_C(0x2fa165d2fe70ba66) /* 517 */,
+ U64_C(0xa103e279970e93d4) /* 518 */, U64_C(0xbecdec77b0e45e71) /* 519 */,
+ U64_C(0xcfb41e723985e497) /* 520 */, U64_C(0xb70aaa025ef75017) /* 521 */,
+ U64_C(0xd42309f03840b8e0) /* 522 */, U64_C(0x8efc1ad035898579) /* 523 */,
+ U64_C(0x96c6920be2b2abc5) /* 524 */, U64_C(0x66af4163375a9172) /* 525 */,
+ U64_C(0x2174abdcca7127fb) /* 526 */, U64_C(0xb33ccea64a72ff41) /* 527 */,
+ U64_C(0xf04a4933083066a5) /* 528 */, U64_C(0x8d970acdd7289af5) /* 529 */,
+ U64_C(0x8f96e8e031c8c25e) /* 530 */, U64_C(0xf3fec02276875d47) /* 531 */,
+ U64_C(0xec7bf310056190dd) /* 532 */, U64_C(0xf5adb0aebb0f1491) /* 533 */,
+ U64_C(0x9b50f8850fd58892) /* 534 */, U64_C(0x4975488358b74de8) /* 535 */,
+ U64_C(0xa3354ff691531c61) /* 536 */, U64_C(0x0702bbe481d2c6ee) /* 537 */,
+ U64_C(0x89fb24057deded98) /* 538 */, U64_C(0xac3075138596e902) /* 539 */,
+ U64_C(0x1d2d3580172772ed) /* 540 */, U64_C(0xeb738fc28e6bc30d) /* 541 */,
+ U64_C(0x5854ef8f63044326) /* 542 */, U64_C(0x9e5c52325add3bbe) /* 543 */,
+ U64_C(0x90aa53cf325c4623) /* 544 */, U64_C(0xc1d24d51349dd067) /* 545 */,
+ U64_C(0x2051cfeea69ea624) /* 546 */, U64_C(0x13220f0a862e7e4f) /* 547 */,
+ U64_C(0xce39399404e04864) /* 548 */, U64_C(0xd9c42ca47086fcb7) /* 549 */,
+ U64_C(0x685ad2238a03e7cc) /* 550 */, U64_C(0x066484b2ab2ff1db) /* 551 */,
+ U64_C(0xfe9d5d70efbf79ec) /* 552 */, U64_C(0x5b13b9dd9c481854) /* 553 */,
+ U64_C(0x15f0d475ed1509ad) /* 554 */, U64_C(0x0bebcd060ec79851) /* 555 */,
+ U64_C(0xd58c6791183ab7f8) /* 556 */, U64_C(0xd1187c5052f3eee4) /* 557 */,
+ U64_C(0xc95d1192e54e82ff) /* 558 */, U64_C(0x86eea14cb9ac6ca2) /* 559 */,
+ U64_C(0x3485beb153677d5d) /* 560 */, U64_C(0xdd191d781f8c492a) /* 561 */,
+ U64_C(0xf60866baa784ebf9) /* 562 */, U64_C(0x518f643ba2d08c74) /* 563 */,
+ U64_C(0x8852e956e1087c22) /* 564 */, U64_C(0xa768cb8dc410ae8d) /* 565 */,
+ U64_C(0x38047726bfec8e1a) /* 566 */, U64_C(0xa67738b4cd3b45aa) /* 567 */,
+ U64_C(0xad16691cec0dde19) /* 568 */, U64_C(0xc6d4319380462e07) /* 569 */,
+ U64_C(0xc5a5876d0ba61938) /* 570 */, U64_C(0x16b9fa1fa58fd840) /* 571 */,
+ U64_C(0x188ab1173ca74f18) /* 572 */, U64_C(0xabda2f98c99c021f) /* 573 */,
+ U64_C(0x3e0580ab134ae816) /* 574 */, U64_C(0x5f3b05b773645abb) /* 575 */,
+ U64_C(0x2501a2be5575f2f6) /* 576 */, U64_C(0x1b2f74004e7e8ba9) /* 577 */,
+ U64_C(0x1cd7580371e8d953) /* 578 */, U64_C(0x7f6ed89562764e30) /* 579 */,
+ U64_C(0xb15926ff596f003d) /* 580 */, U64_C(0x9f65293da8c5d6b9) /* 581 */,
+ U64_C(0x6ecef04dd690f84c) /* 582 */, U64_C(0x4782275fff33af88) /* 583 */,
+ U64_C(0xe41433083f820801) /* 584 */, U64_C(0xfd0dfe409a1af9b5) /* 585 */,
+ U64_C(0x4325a3342cdb396b) /* 586 */, U64_C(0x8ae77e62b301b252) /* 587 */,
+ U64_C(0xc36f9e9f6655615a) /* 588 */, U64_C(0x85455a2d92d32c09) /* 589 */,
+ U64_C(0xf2c7dea949477485) /* 590 */, U64_C(0x63cfb4c133a39eba) /* 591 */,
+ U64_C(0x83b040cc6ebc5462) /* 592 */, U64_C(0x3b9454c8fdb326b0) /* 593 */,
+ U64_C(0x56f56a9e87ffd78c) /* 594 */, U64_C(0x2dc2940d99f42bc6) /* 595 */,
+ U64_C(0x98f7df096b096e2d) /* 596 */, U64_C(0x19a6e01e3ad852bf) /* 597 */,
+ U64_C(0x42a99ccbdbd4b40b) /* 598 */, U64_C(0xa59998af45e9c559) /* 599 */,
+ U64_C(0x366295e807d93186) /* 600 */, U64_C(0x6b48181bfaa1f773) /* 601 */,
+ U64_C(0x1fec57e2157a0a1d) /* 602 */, U64_C(0x4667446af6201ad5) /* 603 */,
+ U64_C(0xe615ebcacfb0f075) /* 604 */, U64_C(0xb8f31f4f68290778) /* 605 */,
+ U64_C(0x22713ed6ce22d11e) /* 606 */, U64_C(0x3057c1a72ec3c93b) /* 607 */,
+ U64_C(0xcb46acc37c3f1f2f) /* 608 */, U64_C(0xdbb893fd02aaf50e) /* 609 */,
+ U64_C(0x331fd92e600b9fcf) /* 610 */, U64_C(0xa498f96148ea3ad6) /* 611 */,
+ U64_C(0xa8d8426e8b6a83ea) /* 612 */, U64_C(0xa089b274b7735cdc) /* 613 */,
+ U64_C(0x87f6b3731e524a11) /* 614 */, U64_C(0x118808e5cbc96749) /* 615 */,
+ U64_C(0x9906e4c7b19bd394) /* 616 */, U64_C(0xafed7f7e9b24a20c) /* 617 */,
+ U64_C(0x6509eadeeb3644a7) /* 618 */, U64_C(0x6c1ef1d3e8ef0ede) /* 619 */,
+ U64_C(0xb9c97d43e9798fb4) /* 620 */, U64_C(0xa2f2d784740c28a3) /* 621 */,
+ U64_C(0x7b8496476197566f) /* 622 */, U64_C(0x7a5be3e6b65f069d) /* 623 */,
+ U64_C(0xf96330ed78be6f10) /* 624 */, U64_C(0xeee60de77a076a15) /* 625 */,
+ U64_C(0x2b4bee4aa08b9bd0) /* 626 */, U64_C(0x6a56a63ec7b8894e) /* 627 */,
+ U64_C(0x02121359ba34fef4) /* 628 */, U64_C(0x4cbf99f8283703fc) /* 629 */,
+ U64_C(0x398071350caf30c8) /* 630 */, U64_C(0xd0a77a89f017687a) /* 631 */,
+ U64_C(0xf1c1a9eb9e423569) /* 632 */, U64_C(0x8c7976282dee8199) /* 633 */,
+ U64_C(0x5d1737a5dd1f7abd) /* 634 */, U64_C(0x4f53433c09a9fa80) /* 635 */,
+ U64_C(0xfa8b0c53df7ca1d9) /* 636 */, U64_C(0x3fd9dcbc886ccb77) /* 637 */,
+ U64_C(0xc040917ca91b4720) /* 638 */, U64_C(0x7dd00142f9d1dcdf) /* 639 */,
+ U64_C(0x8476fc1d4f387b58) /* 640 */, U64_C(0x23f8e7c5f3316503) /* 641 */,
+ U64_C(0x032a2244e7e37339) /* 642 */, U64_C(0x5c87a5d750f5a74b) /* 643 */,
+ U64_C(0x082b4cc43698992e) /* 644 */, U64_C(0xdf917becb858f63c) /* 645 */,
+ U64_C(0x3270b8fc5bf86dda) /* 646 */, U64_C(0x10ae72bb29b5dd76) /* 647 */,
+ U64_C(0x576ac94e7700362b) /* 648 */, U64_C(0x1ad112dac61efb8f) /* 649 */,
+ U64_C(0x691bc30ec5faa427) /* 650 */, U64_C(0xff246311cc327143) /* 651 */,
+ U64_C(0x3142368e30e53206) /* 652 */, U64_C(0x71380e31e02ca396) /* 653 */,
+ U64_C(0x958d5c960aad76f1) /* 654 */, U64_C(0xf8d6f430c16da536) /* 655 */,
+ U64_C(0xc8ffd13f1be7e1d2) /* 656 */, U64_C(0x7578ae66004ddbe1) /* 657 */,
+ U64_C(0x05833f01067be646) /* 658 */, U64_C(0xbb34b5ad3bfe586d) /* 659 */,
+ U64_C(0x095f34c9a12b97f0) /* 660 */, U64_C(0x247ab64525d60ca8) /* 661 */,
+ U64_C(0xdcdbc6f3017477d1) /* 662 */, U64_C(0x4a2e14d4decad24d) /* 663 */,
+ U64_C(0xbdb5e6d9be0a1eeb) /* 664 */, U64_C(0x2a7e70f7794301ab) /* 665 */,
+ U64_C(0xdef42d8a270540fd) /* 666 */, U64_C(0x01078ec0a34c22c1) /* 667 */,
+ U64_C(0xe5de511af4c16387) /* 668 */, U64_C(0x7ebb3a52bd9a330a) /* 669 */,
+ U64_C(0x77697857aa7d6435) /* 670 */, U64_C(0x004e831603ae4c32) /* 671 */,
+ U64_C(0xe7a21020ad78e312) /* 672 */, U64_C(0x9d41a70c6ab420f2) /* 673 */,
+ U64_C(0x28e06c18ea1141e6) /* 674 */, U64_C(0xd2b28cbd984f6b28) /* 675 */,
+ U64_C(0x26b75f6c446e9d83) /* 676 */, U64_C(0xba47568c4d418d7f) /* 677 */,
+ U64_C(0xd80badbfe6183d8e) /* 678 */, U64_C(0x0e206d7f5f166044) /* 679 */,
+ U64_C(0xe258a43911cbca3e) /* 680 */, U64_C(0x723a1746b21dc0bc) /* 681 */,
+ U64_C(0xc7caa854f5d7cdd3) /* 682 */, U64_C(0x7cac32883d261d9c) /* 683 */,
+ U64_C(0x7690c26423ba942c) /* 684 */, U64_C(0x17e55524478042b8) /* 685 */,
+ U64_C(0xe0be477656a2389f) /* 686 */, U64_C(0x4d289b5e67ab2da0) /* 687 */,
+ U64_C(0x44862b9c8fbbfd31) /* 688 */, U64_C(0xb47cc8049d141365) /* 689 */,
+ U64_C(0x822c1b362b91c793) /* 690 */, U64_C(0x4eb14655fb13dfd8) /* 691 */,
+ U64_C(0x1ecbba0714e2a97b) /* 692 */, U64_C(0x6143459d5cde5f14) /* 693 */,
+ U64_C(0x53a8fbf1d5f0ac89) /* 694 */, U64_C(0x97ea04d81c5e5b00) /* 695 */,
+ U64_C(0x622181a8d4fdb3f3) /* 696 */, U64_C(0xe9bcd341572a1208) /* 697 */,
+ U64_C(0x1411258643cce58a) /* 698 */, U64_C(0x9144c5fea4c6e0a4) /* 699 */,
+ U64_C(0x0d33d06565cf620f) /* 700 */, U64_C(0x54a48d489f219ca1) /* 701 */,
+ U64_C(0xc43e5eac6d63c821) /* 702 */, U64_C(0xa9728b3a72770daf) /* 703 */,
+ U64_C(0xd7934e7b20df87ef) /* 704 */, U64_C(0xe35503b61a3e86e5) /* 705 */,
+ U64_C(0xcae321fbc819d504) /* 706 */, U64_C(0x129a50b3ac60bfa6) /* 707 */,
+ U64_C(0xcd5e68ea7e9fb6c3) /* 708 */, U64_C(0xb01c90199483b1c7) /* 709 */,
+ U64_C(0x3de93cd5c295376c) /* 710 */, U64_C(0xaed52edf2ab9ad13) /* 711 */,
+ U64_C(0x2e60f512c0a07884) /* 712 */, U64_C(0xbc3d86a3e36210c9) /* 713 */,
+ U64_C(0x35269d9b163951ce) /* 714 */, U64_C(0x0c7d6e2ad0cdb5fa) /* 715 */,
+ U64_C(0x59e86297d87f5733) /* 716 */, U64_C(0x298ef221898db0e7) /* 717 */,
+ U64_C(0x55000029d1a5aa7e) /* 718 */, U64_C(0x8bc08ae1b5061b45) /* 719 */,
+ U64_C(0xc2c31c2b6c92703a) /* 720 */, U64_C(0x94cc596baf25ef42) /* 721 */,
+ U64_C(0x0a1d73db22540456) /* 722 */, U64_C(0x04b6a0f9d9c4179a) /* 723 */,
+ U64_C(0xeffdafa2ae3d3c60) /* 724 */, U64_C(0xf7c8075bb49496c4) /* 725 */,
+ U64_C(0x9cc5c7141d1cd4e3) /* 726 */, U64_C(0x78bd1638218e5534) /* 727 */,
+ U64_C(0xb2f11568f850246a) /* 728 */, U64_C(0xedfabcfa9502bc29) /* 729 */,
+ U64_C(0x796ce5f2da23051b) /* 730 */, U64_C(0xaae128b0dc93537c) /* 731 */,
+ U64_C(0x3a493da0ee4b29ae) /* 732 */, U64_C(0xb5df6b2c416895d7) /* 733 */,
+ U64_C(0xfcabbd25122d7f37) /* 734 */, U64_C(0x70810b58105dc4b1) /* 735 */,
+ U64_C(0xe10fdd37f7882a90) /* 736 */, U64_C(0x524dcab5518a3f5c) /* 737 */,
+ U64_C(0x3c9e85878451255b) /* 738 */, U64_C(0x4029828119bd34e2) /* 739 */,
+ U64_C(0x74a05b6f5d3ceccb) /* 740 */, U64_C(0xb610021542e13eca) /* 741 */,
+ U64_C(0x0ff979d12f59e2ac) /* 742 */, U64_C(0x6037da27e4f9cc50) /* 743 */,
+ U64_C(0x5e92975a0df1847d) /* 744 */, U64_C(0xd66de190d3e623fe) /* 745 */,
+ U64_C(0x5032d6b87b568048) /* 746 */, U64_C(0x9a36b7ce8235216e) /* 747 */,
+ U64_C(0x80272a7a24f64b4a) /* 748 */, U64_C(0x93efed8b8c6916f7) /* 749 */,
+ U64_C(0x37ddbff44cce1555) /* 750 */, U64_C(0x4b95db5d4b99bd25) /* 751 */,
+ U64_C(0x92d3fda169812fc0) /* 752 */, U64_C(0xfb1a4a9a90660bb6) /* 753 */,
+ U64_C(0x730c196946a4b9b2) /* 754 */, U64_C(0x81e289aa7f49da68) /* 755 */,
+ U64_C(0x64669a0f83b1a05f) /* 756 */, U64_C(0x27b3ff7d9644f48b) /* 757 */,
+ U64_C(0xcc6b615c8db675b3) /* 758 */, U64_C(0x674f20b9bcebbe95) /* 759 */,
+ U64_C(0x6f31238275655982) /* 760 */, U64_C(0x5ae488713e45cf05) /* 761 */,
+ U64_C(0xbf619f9954c21157) /* 762 */, U64_C(0xeabac46040a8eae9) /* 763 */,
+ U64_C(0x454c6fe9f2c0c1cd) /* 764 */, U64_C(0x419cf6496412691c) /* 765 */,
+ U64_C(0xd3dc3bef265b0f70) /* 766 */, U64_C(0x6d0e60f5c3578a9e) /* 767 */
+};
+static u64 sbox4[256] = {
+ U64_C(0x5b0e608526323c55) /* 768 */, U64_C(0x1a46c1a9fa1b59f5) /* 769 */,
+ U64_C(0xa9e245a17c4c8ffa) /* 770 */, U64_C(0x65ca5159db2955d7) /* 771 */,
+ U64_C(0x05db0a76ce35afc2) /* 772 */, U64_C(0x81eac77ea9113d45) /* 773 */,
+ U64_C(0x528ef88ab6ac0a0d) /* 774 */, U64_C(0xa09ea253597be3ff) /* 775 */,
+ U64_C(0x430ddfb3ac48cd56) /* 776 */, U64_C(0xc4b3a67af45ce46f) /* 777 */,
+ U64_C(0x4ececfd8fbe2d05e) /* 778 */, U64_C(0x3ef56f10b39935f0) /* 779 */,
+ U64_C(0x0b22d6829cd619c6) /* 780 */, U64_C(0x17fd460a74df2069) /* 781 */,
+ U64_C(0x6cf8cc8e8510ed40) /* 782 */, U64_C(0xd6c824bf3a6ecaa7) /* 783 */,
+ U64_C(0x61243d581a817049) /* 784 */, U64_C(0x048bacb6bbc163a2) /* 785 */,
+ U64_C(0xd9a38ac27d44cc32) /* 786 */, U64_C(0x7fddff5baaf410ab) /* 787 */,
+ U64_C(0xad6d495aa804824b) /* 788 */, U64_C(0xe1a6a74f2d8c9f94) /* 789 */,
+ U64_C(0xd4f7851235dee8e3) /* 790 */, U64_C(0xfd4b7f886540d893) /* 791 */,
+ U64_C(0x247c20042aa4bfda) /* 792 */, U64_C(0x096ea1c517d1327c) /* 793 */,
+ U64_C(0xd56966b4361a6685) /* 794 */, U64_C(0x277da5c31221057d) /* 795 */,
+ U64_C(0x94d59893a43acff7) /* 796 */, U64_C(0x64f0c51ccdc02281) /* 797 */,
+ U64_C(0x3d33bcc4ff6189db) /* 798 */, U64_C(0xe005cb184ce66af1) /* 799 */,
+ U64_C(0xff5ccd1d1db99bea) /* 800 */, U64_C(0xb0b854a7fe42980f) /* 801 */,
+ U64_C(0x7bd46a6a718d4b9f) /* 802 */, U64_C(0xd10fa8cc22a5fd8c) /* 803 */,
+ U64_C(0xd31484952be4bd31) /* 804 */, U64_C(0xc7fa975fcb243847) /* 805 */,
+ U64_C(0x4886ed1e5846c407) /* 806 */, U64_C(0x28cddb791eb70b04) /* 807 */,
+ U64_C(0xc2b00be2f573417f) /* 808 */, U64_C(0x5c9590452180f877) /* 809 */,
+ U64_C(0x7a6bddfff370eb00) /* 810 */, U64_C(0xce509e38d6d9d6a4) /* 811 */,
+ U64_C(0xebeb0f00647fa702) /* 812 */, U64_C(0x1dcc06cf76606f06) /* 813 */,
+ U64_C(0xe4d9f28ba286ff0a) /* 814 */, U64_C(0xd85a305dc918c262) /* 815 */,
+ U64_C(0x475b1d8732225f54) /* 816 */, U64_C(0x2d4fb51668ccb5fe) /* 817 */,
+ U64_C(0xa679b9d9d72bba20) /* 818 */, U64_C(0x53841c0d912d43a5) /* 819 */,
+ U64_C(0x3b7eaa48bf12a4e8) /* 820 */, U64_C(0x781e0e47f22f1ddf) /* 821 */,
+ U64_C(0xeff20ce60ab50973) /* 822 */, U64_C(0x20d261d19dffb742) /* 823 */,
+ U64_C(0x16a12b03062a2e39) /* 824 */, U64_C(0x1960eb2239650495) /* 825 */,
+ U64_C(0x251c16fed50eb8b8) /* 826 */, U64_C(0x9ac0c330f826016e) /* 827 */,
+ U64_C(0xed152665953e7671) /* 828 */, U64_C(0x02d63194a6369570) /* 829 */,
+ U64_C(0x5074f08394b1c987) /* 830 */, U64_C(0x70ba598c90b25ce1) /* 831 */,
+ U64_C(0x794a15810b9742f6) /* 832 */, U64_C(0x0d5925e9fcaf8c6c) /* 833 */,
+ U64_C(0x3067716cd868744e) /* 834 */, U64_C(0x910ab077e8d7731b) /* 835 */,
+ U64_C(0x6a61bbdb5ac42f61) /* 836 */, U64_C(0x93513efbf0851567) /* 837 */,
+ U64_C(0xf494724b9e83e9d5) /* 838 */, U64_C(0xe887e1985c09648d) /* 839 */,
+ U64_C(0x34b1d3c675370cfd) /* 840 */, U64_C(0xdc35e433bc0d255d) /* 841 */,
+ U64_C(0xd0aab84234131be0) /* 842 */, U64_C(0x08042a50b48b7eaf) /* 843 */,
+ U64_C(0x9997c4ee44a3ab35) /* 844 */, U64_C(0x829a7b49201799d0) /* 845 */,
+ U64_C(0x263b8307b7c54441) /* 846 */, U64_C(0x752f95f4fd6a6ca6) /* 847 */,
+ U64_C(0x927217402c08c6e5) /* 848 */, U64_C(0x2a8ab754a795d9ee) /* 849 */,
+ U64_C(0xa442f7552f72943d) /* 850 */, U64_C(0x2c31334e19781208) /* 851 */,
+ U64_C(0x4fa98d7ceaee6291) /* 852 */, U64_C(0x55c3862f665db309) /* 853 */,
+ U64_C(0xbd0610175d53b1f3) /* 854 */, U64_C(0x46fe6cb840413f27) /* 855 */,
+ U64_C(0x3fe03792df0cfa59) /* 856 */, U64_C(0xcfe700372eb85e8f) /* 857 */,
+ U64_C(0xa7be29e7adbce118) /* 858 */, U64_C(0xe544ee5cde8431dd) /* 859 */,
+ U64_C(0x8a781b1b41f1873e) /* 860 */, U64_C(0xa5c94c78a0d2f0e7) /* 861 */,
+ U64_C(0x39412e2877b60728) /* 862 */, U64_C(0xa1265ef3afc9a62c) /* 863 */,
+ U64_C(0xbcc2770c6a2506c5) /* 864 */, U64_C(0x3ab66dd5dce1ce12) /* 865 */,
+ U64_C(0xe65499d04a675b37) /* 866 */, U64_C(0x7d8f523481bfd216) /* 867 */,
+ U64_C(0x0f6f64fcec15f389) /* 868 */, U64_C(0x74efbe618b5b13c8) /* 869 */,
+ U64_C(0xacdc82b714273e1d) /* 870 */, U64_C(0xdd40bfe003199d17) /* 871 */,
+ U64_C(0x37e99257e7e061f8) /* 872 */, U64_C(0xfa52626904775aaa) /* 873 */,
+ U64_C(0x8bbbf63a463d56f9) /* 874 */, U64_C(0xf0013f1543a26e64) /* 875 */,
+ U64_C(0xa8307e9f879ec898) /* 876 */, U64_C(0xcc4c27a4150177cc) /* 877 */,
+ U64_C(0x1b432f2cca1d3348) /* 878 */, U64_C(0xde1d1f8f9f6fa013) /* 879 */,
+ U64_C(0x606602a047a7ddd6) /* 880 */, U64_C(0xd237ab64cc1cb2c7) /* 881 */,
+ U64_C(0x9b938e7225fcd1d3) /* 882 */, U64_C(0xec4e03708e0ff476) /* 883 */,
+ U64_C(0xfeb2fbda3d03c12d) /* 884 */, U64_C(0xae0bced2ee43889a) /* 885 */,
+ U64_C(0x22cb8923ebfb4f43) /* 886 */, U64_C(0x69360d013cf7396d) /* 887 */,
+ U64_C(0x855e3602d2d4e022) /* 888 */, U64_C(0x073805bad01f784c) /* 889 */,
+ U64_C(0x33e17a133852f546) /* 890 */, U64_C(0xdf4874058ac7b638) /* 891 */,
+ U64_C(0xba92b29c678aa14a) /* 892 */, U64_C(0x0ce89fc76cfaadcd) /* 893 */,
+ U64_C(0x5f9d4e0908339e34) /* 894 */, U64_C(0xf1afe9291f5923b9) /* 895 */,
+ U64_C(0x6e3480f60f4a265f) /* 896 */, U64_C(0xeebf3a2ab29b841c) /* 897 */,
+ U64_C(0xe21938a88f91b4ad) /* 898 */, U64_C(0x57dfeff845c6d3c3) /* 899 */,
+ U64_C(0x2f006b0bf62caaf2) /* 900 */, U64_C(0x62f479ef6f75ee78) /* 901 */,
+ U64_C(0x11a55ad41c8916a9) /* 902 */, U64_C(0xf229d29084fed453) /* 903 */,
+ U64_C(0x42f1c27b16b000e6) /* 904 */, U64_C(0x2b1f76749823c074) /* 905 */,
+ U64_C(0x4b76eca3c2745360) /* 906 */, U64_C(0x8c98f463b91691bd) /* 907 */,
+ U64_C(0x14bcc93cf1ade66a) /* 908 */, U64_C(0x8885213e6d458397) /* 909 */,
+ U64_C(0x8e177df0274d4711) /* 910 */, U64_C(0xb49b73b5503f2951) /* 911 */,
+ U64_C(0x10168168c3f96b6b) /* 912 */, U64_C(0x0e3d963b63cab0ae) /* 913 */,
+ U64_C(0x8dfc4b5655a1db14) /* 914 */, U64_C(0xf789f1356e14de5c) /* 915 */,
+ U64_C(0x683e68af4e51dac1) /* 916 */, U64_C(0xc9a84f9d8d4b0fd9) /* 917 */,
+ U64_C(0x3691e03f52a0f9d1) /* 918 */, U64_C(0x5ed86e46e1878e80) /* 919 */,
+ U64_C(0x3c711a0e99d07150) /* 920 */, U64_C(0x5a0865b20c4e9310) /* 921 */,
+ U64_C(0x56fbfc1fe4f0682e) /* 922 */, U64_C(0xea8d5de3105edf9b) /* 923 */,
+ U64_C(0x71abfdb12379187a) /* 924 */, U64_C(0x2eb99de1bee77b9c) /* 925 */,
+ U64_C(0x21ecc0ea33cf4523) /* 926 */, U64_C(0x59a4d7521805c7a1) /* 927 */,
+ U64_C(0x3896f5eb56ae7c72) /* 928 */, U64_C(0xaa638f3db18f75dc) /* 929 */,
+ U64_C(0x9f39358dabe9808e) /* 930 */, U64_C(0xb7defa91c00b72ac) /* 931 */,
+ U64_C(0x6b5541fd62492d92) /* 932 */, U64_C(0x6dc6dee8f92e4d5b) /* 933 */,
+ U64_C(0x353f57abc4beea7e) /* 934 */, U64_C(0x735769d6da5690ce) /* 935 */,
+ U64_C(0x0a234aa642391484) /* 936 */, U64_C(0xf6f9508028f80d9d) /* 937 */,
+ U64_C(0xb8e319a27ab3f215) /* 938 */, U64_C(0x31ad9c1151341a4d) /* 939 */,
+ U64_C(0x773c22a57bef5805) /* 940 */, U64_C(0x45c7561a07968633) /* 941 */,
+ U64_C(0xf913da9e249dbe36) /* 942 */, U64_C(0xda652d9b78a64c68) /* 943 */,
+ U64_C(0x4c27a97f3bc334ef) /* 944 */, U64_C(0x76621220e66b17f4) /* 945 */,
+ U64_C(0x967743899acd7d0b) /* 946 */, U64_C(0xf3ee5bcae0ed6782) /* 947 */,
+ U64_C(0x409f753600c879fc) /* 948 */, U64_C(0x06d09a39b5926db6) /* 949 */,
+ U64_C(0x6f83aeb0317ac588) /* 950 */, U64_C(0x01e6ca4a86381f21) /* 951 */,
+ U64_C(0x66ff3462d19f3025) /* 952 */, U64_C(0x72207c24ddfd3bfb) /* 953 */,
+ U64_C(0x4af6b6d3e2ece2eb) /* 954 */, U64_C(0x9c994dbec7ea08de) /* 955 */,
+ U64_C(0x49ace597b09a8bc4) /* 956 */, U64_C(0xb38c4766cf0797ba) /* 957 */,
+ U64_C(0x131b9373c57c2a75) /* 958 */, U64_C(0xb1822cce61931e58) /* 959 */,
+ U64_C(0x9d7555b909ba1c0c) /* 960 */, U64_C(0x127fafdd937d11d2) /* 961 */,
+ U64_C(0x29da3badc66d92e4) /* 962 */, U64_C(0xa2c1d57154c2ecbc) /* 963 */,
+ U64_C(0x58c5134d82f6fe24) /* 964 */, U64_C(0x1c3ae3515b62274f) /* 965 */,
+ U64_C(0xe907c82e01cb8126) /* 966 */, U64_C(0xf8ed091913e37fcb) /* 967 */,
+ U64_C(0x3249d8f9c80046c9) /* 968 */, U64_C(0x80cf9bede388fb63) /* 969 */,
+ U64_C(0x1881539a116cf19e) /* 970 */, U64_C(0x5103f3f76bd52457) /* 971 */,
+ U64_C(0x15b7e6f5ae47f7a8) /* 972 */, U64_C(0xdbd7c6ded47e9ccf) /* 973 */,
+ U64_C(0x44e55c410228bb1a) /* 974 */, U64_C(0xb647d4255edb4e99) /* 975 */,
+ U64_C(0x5d11882bb8aafc30) /* 976 */, U64_C(0xf5098bbb29d3212a) /* 977 */,
+ U64_C(0x8fb5ea14e90296b3) /* 978 */, U64_C(0x677b942157dd025a) /* 979 */,
+ U64_C(0xfb58e7c0a390acb5) /* 980 */, U64_C(0x89d3674c83bd4a01) /* 981 */,
+ U64_C(0x9e2da4df4bf3b93b) /* 982 */, U64_C(0xfcc41e328cab4829) /* 983 */,
+ U64_C(0x03f38c96ba582c52) /* 984 */, U64_C(0xcad1bdbd7fd85db2) /* 985 */,
+ U64_C(0xbbb442c16082ae83) /* 986 */, U64_C(0xb95fe86ba5da9ab0) /* 987 */,
+ U64_C(0xb22e04673771a93f) /* 988 */, U64_C(0x845358c9493152d8) /* 989 */,
+ U64_C(0xbe2a488697b4541e) /* 990 */, U64_C(0x95a2dc2dd38e6966) /* 991 */,
+ U64_C(0xc02c11ac923c852b) /* 992 */, U64_C(0x2388b1990df2a87b) /* 993 */,
+ U64_C(0x7c8008fa1b4f37be) /* 994 */, U64_C(0x1f70d0c84d54e503) /* 995 */,
+ U64_C(0x5490adec7ece57d4) /* 996 */, U64_C(0x002b3c27d9063a3a) /* 997 */,
+ U64_C(0x7eaea3848030a2bf) /* 998 */, U64_C(0xc602326ded2003c0) /* 999 */,
+ U64_C(0x83a7287d69a94086) /* 1000 */, U64_C(0xc57a5fcb30f57a8a) /* 1001 */,
+ U64_C(0xb56844e479ebe779) /* 1002 */, U64_C(0xa373b40f05dcbce9) /* 1003 */,
+ U64_C(0xd71a786e88570ee2) /* 1004 */, U64_C(0x879cbacdbde8f6a0) /* 1005 */,
+ U64_C(0x976ad1bcc164a32f) /* 1006 */, U64_C(0xab21e25e9666d78b) /* 1007 */,
+ U64_C(0x901063aae5e5c33c) /* 1008 */, U64_C(0x9818b34448698d90) /* 1009 */,
+ U64_C(0xe36487ae3e1e8abb) /* 1010 */, U64_C(0xafbdf931893bdcb4) /* 1011 */,
+ U64_C(0x6345a0dc5fbbd519) /* 1012 */, U64_C(0x8628fe269b9465ca) /* 1013 */,
+ U64_C(0x1e5d01603f9c51ec) /* 1014 */, U64_C(0x4de44006a15049b7) /* 1015 */,
+ U64_C(0xbf6c70e5f776cbb1) /* 1016 */, U64_C(0x411218f2ef552bed) /* 1017 */,
+ U64_C(0xcb0c0708705a36a3) /* 1018 */, U64_C(0xe74d14754f986044) /* 1019 */,
+ U64_C(0xcd56d9430ea8280e) /* 1020 */, U64_C(0xc12591d7535f5065) /* 1021 */,
+ U64_C(0xc83223f1720aef96) /* 1022 */, U64_C(0xc3a0396f7363a51f) /* 1023 */
+};
+
+static unsigned int
+transform ( void *ctx, const unsigned char *data, size_t nblks );
+
+static void
+do_init (void *context, int variant)
+{
+ TIGER_CONTEXT *hd = context;
+
+ hd->a = 0x0123456789abcdefLL;
+ hd->b = 0xfedcba9876543210LL;
+ hd->c = 0xf096a5b4c3b2e187LL;
+
+ hd->bctx.nblocks = 0;
+ hd->bctx.nblocks_high = 0;
+ hd->bctx.count = 0;
+ hd->bctx.blocksize_shift = _gcry_ctz(64);
+ hd->bctx.bwrite = transform;
+ hd->variant = variant;
+}
+
+static void
+tiger_init (void *context, unsigned int flags)
+{
+ (void)flags;
+
+ do_init (context, 0);
+}
+
+static void
+tiger1_init (void *context, unsigned int flags)
+{
+ (void)flags;
+
+ do_init (context, 1);
+}
+
+static void
+tiger2_init (void *context, unsigned int flags)
+{
+ (void)flags;
+
+ do_init (context, 2);
+}
+
+
+#define tiger_round(xa, xb, xc, xx, xmul) { \
+ xc ^= xx; \
+ xa -= ( sbox1[ (xc) & 0xff ] ^ sbox2[ ((xc) >> 16) & 0xff ] \
+ ^ sbox3[ ((xc) >> 32) & 0xff ] ^ sbox4[ ((xc) >> 48) & 0xff ]); \
+ xb += ( sbox4[ ((xc) >> 8) & 0xff ] ^ sbox3[ ((xc) >> 24) & 0xff ] \
+ ^ sbox2[ ((xc) >> 40) & 0xff ] ^ sbox1[ ((xc) >> 56) & 0xff ]); \
+ xb *= xmul; }
+
+
+#define pass(ya, yb, yc, yx, ymul) { \
+ tiger_round( ya, yb, yc, yx[0], ymul ); \
+ tiger_round( yb, yc, ya, yx[1], ymul ); \
+ tiger_round( yc, ya, yb, yx[2], ymul ); \
+ tiger_round( ya, yb, yc, yx[3], ymul ); \
+ tiger_round( yb, yc, ya, yx[4], ymul ); \
+ tiger_round( yc, ya, yb, yx[5], ymul ); \
+ tiger_round( ya, yb, yc, yx[6], ymul ); \
+ tiger_round( yb, yc, ya, yx[7], ymul ); }
+
+
+#define key_schedule(x) { \
+ x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; \
+ x[1] ^= x[0]; \
+ x[2] += x[1]; \
+ x[3] -= x[2] ^ ((~x[1]) << 19 ); \
+ x[4] ^= x[3]; \
+ x[5] += x[4]; \
+ x[6] -= x[5] ^ ((~x[4]) >> 23 ); \
+ x[7] ^= x[6]; \
+ x[0] += x[7]; \
+ x[1] -= x[0] ^ ((~x[7]) << 19 ); \
+ x[2] ^= x[1]; \
+ x[3] += x[2]; \
+ x[4] -= x[3] ^ ((~x[2]) >> 23 ); \
+ x[5] ^= x[4]; \
+ x[6] += x[5]; \
+ x[7] -= x[6] ^ 0x0123456789abcdefLL; }
+
+
+/****************
+ * Transform the message DATA which consists of 512 bytes (8 words)
+ */
+static unsigned int
+transform_blk ( void *ctx, const unsigned char *data )
+{
+ TIGER_CONTEXT *hd = ctx;
+ u64 a,b,c,aa,bb,cc;
+ u64 x[8];
+ int i;
+
+ for ( i = 0; i < 8; i++ )
+ x[i] = buf_get_le64(data + i * 8);
+
+ /* save */
+ a = aa = hd->a;
+ b = bb = hd->b;
+ c = cc = hd->c;
+
+ pass( a, b, c, x, 5);
+ key_schedule( x );
+ pass( c, a, b, x, 7);
+ key_schedule( x );
+ pass( b, c, a, x, 9);
+
+ /* feedforward */
+ a ^= aa;
+ b -= bb;
+ c += cc;
+ /* store */
+ hd->a = a;
+ hd->b = b;
+ hd->c = c;
+
+ return /*burn_stack*/ 21*8+11*sizeof(void*);
+}
+
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+ unsigned int burn;
+
+ do
+ {
+ burn = transform_blk (c, data);
+ data += 64;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+
+
+/* The routine terminates the computation
+ */
+static void
+tiger_final( void *context )
+{
+ TIGER_CONTEXT *hd = context;
+ u32 t, th, msb, lsb;
+ byte *p;
+ unsigned int burn;
+ byte pad = hd->variant == 2? 0x80 : 0x01;
+
+ t = hd->bctx.nblocks;
+ if (sizeof t == sizeof hd->bctx.nblocks)
+ th = hd->bctx.nblocks_high;
+ else
+ th = hd->bctx.nblocks >> 32;
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 26);
+ /* add the count */
+ t = lsb;
+ if( (lsb += hd->bctx.count) < t )
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 29;
+
+ if( hd->bctx.count < 56 ) /* enough room */
+ {
+ hd->bctx.buf[hd->bctx.count++] = pad;
+ if (hd->bctx.count < 56)
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 60, msb);
+ burn = transform( hd, hd->bctx.buf, 1 );
+ }
+ else /* need one extra block */
+ {
+ hd->bctx.buf[hd->bctx.count++] = pad; /* pad character */
+ /* fill pad and next block with zeroes */
+ memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+ /* append the 64 bit count */
+ buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+ buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+ burn = transform( hd, hd->bctx.buf, 2 );
+ }
+
+ p = hd->bctx.buf;
+#define X(a) do { buf_put_be64(p, hd->a); p += 8; } while(0)
+#define Y(a) do { buf_put_le64(p, hd->a); p += 8; } while(0)
+ if (hd->variant == 0)
+ {
+ X(a);
+ X(b);
+ X(c);
+ }
+ else
+ {
+ Y(a);
+ Y(b);
+ Y(c);
+ }
+#undef X
+#undef Y
+
+ hd->bctx.count = 0;
+
+ _gcry_burn_stack (burn);
+}
+
+static byte *
+tiger_read( void *context )
+{
+ TIGER_CONTEXT *hd = context;
+
+ return hd->bctx.buf;
+}
+
+
+
+/* This is the old TIGER variant based on the unfixed reference
+ implementation. IT was used in GnupG up to 1.3.2. We don't provide
+ an OID anymore because that would not be correct. */
+gcry_md_spec_t _gcry_digest_spec_tiger =
+ {
+ GCRY_MD_TIGER, {0, 0},
+ "TIGER192", NULL, 0, NULL, 24,
+ tiger_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
+ NULL, NULL,
+ sizeof (TIGER_CONTEXT)
+ };
+
+
+
+/* This is the fixed TIGER implementation. */
+static byte asn1[19] = /* Object ID is 1.3.6.1.4.1.11591.12.2 */
+ { 0x30, 0x29, 0x30, 0x0d, 0x06, 0x09, 0x2b, 0x06,
+ 0x01, 0x04, 0x01, 0xda, 0x47, 0x0c, 0x02,
+ 0x05, 0x00, 0x04, 0x18 };
+
+static gcry_md_oid_spec_t oid_spec_tiger1[] =
+ {
+ /* GNU.digestAlgorithm TIGER */
+ { "1.3.6.1.4.1.11591.12.2" },
+ { NULL }
+ };
+
+gcry_md_spec_t _gcry_digest_spec_tiger1 =
+ {
+ GCRY_MD_TIGER1, {0, 0},
+ "TIGER", asn1, DIM (asn1), oid_spec_tiger1, 24,
+ tiger1_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
+ NULL, NULL,
+ sizeof (TIGER_CONTEXT)
+ };
+
+
+
+/* This is TIGER2 which usues a changed padding algorithm. */
+gcry_md_spec_t _gcry_digest_spec_tiger2 =
+ {
+ GCRY_MD_TIGER2, {0, 0},
+ "TIGER2", NULL, 0, NULL, 24,
+ tiger2_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
+ NULL, NULL,
+ sizeof (TIGER_CONTEXT)
+ };
diff --git a/comm/third_party/libgcrypt/cipher/twofish-aarch64.S b/comm/third_party/libgcrypt/cipher/twofish-aarch64.S
new file mode 100644
index 0000000000..9f35b5cdeb
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/twofish-aarch64.S
@@ -0,0 +1,321 @@
+/* twofish-aarch64.S - ARMv8/AArch64 assembly implementation of Twofish cipher
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__)
+#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+
+.text
+
+/* structure of TWOFISH_context: */
+#define s0 0
+#define s1 ((s0) + 4 * 256)
+#define s2 ((s1) + 4 * 256)
+#define s3 ((s2) + 4 * 256)
+#define w ((s3) + 4 * 256)
+#define k ((w) + 4 * 8)
+
+/* register macros */
+#define CTX x0
+#define RDST x1
+#define RSRC x2
+#define CTXs0 CTX
+#define CTXs1 x3
+#define CTXs2 x4
+#define CTXs3 x5
+#define CTXw x17
+
+#define RA w6
+#define RB w7
+#define RC w8
+#define RD w9
+
+#define RX w10
+#define RY w11
+
+#define xRX x10
+#define xRY x11
+
+#define RMASK w12
+
+#define RT0 w13
+#define RT1 w14
+#define RT2 w15
+#define RT3 w16
+
+#define xRT0 x13
+#define xRT1 x14
+#define xRT2 x15
+#define xRT3 x16
+
+/* helper macros */
+#ifndef __AARCH64EL__
+ /* bswap on big-endian */
+ #define host_to_le(reg) \
+ rev reg, reg;
+ #define le_to_host(reg) \
+ rev reg, reg;
+#else
+ /* nop on little-endian */
+ #define host_to_le(reg) /*_*/
+ #define le_to_host(reg) /*_*/
+#endif
+
+#define ldr_input_aligned_le(rin, a, b, c, d) \
+ ldr a, [rin, #0]; \
+ ldr b, [rin, #4]; \
+ le_to_host(a); \
+ ldr c, [rin, #8]; \
+ le_to_host(b); \
+ ldr d, [rin, #12]; \
+ le_to_host(c); \
+ le_to_host(d);
+
+#define str_output_aligned_le(rout, a, b, c, d) \
+ le_to_host(a); \
+ le_to_host(b); \
+ str a, [rout, #0]; \
+ le_to_host(c); \
+ str b, [rout, #4]; \
+ le_to_host(d); \
+ str c, [rout, #8]; \
+ str d, [rout, #12];
+
+/* unaligned word reads/writes allowed */
+#define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \
+ ldr_input_aligned_le(rin, ra, rb, rc, rd)
+
+#define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+ str_output_aligned_le(rout, ra, rb, rc, rd)
+
+/**********************************************************************
+ 1-way twofish
+ **********************************************************************/
+#define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \
+ and RT0, RMASK, b, lsr#(8 - 2); \
+ and RY, RMASK, b, lsr#(16 - 2); \
+ and RT1, RMASK, b, lsr#(24 - 2); \
+ ldr RY, [CTXs3, xRY]; \
+ and RT2, RMASK, b, lsl#(2); \
+ ldr RT0, [CTXs2, xRT0]; \
+ and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \
+ ldr RT1, [CTXs0, xRT1]; \
+ and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \
+ ldr RT2, [CTXs1, xRT2]; \
+ ldr RX, [CTXs1, xRX]; \
+ ror_a(a); \
+ \
+ eor RY, RY, RT0; \
+ ldr RT3, [CTXs2, xRT3]; \
+ and RT0, RMASK, a, lsl#(2); \
+ eor RY, RY, RT1; \
+ and RT1, RMASK, a, lsr#(24 - 2); \
+ eor RY, RY, RT2; \
+ ldr RT0, [CTXs0, xRT0]; \
+ eor RX, RX, RT3; \
+ ldr RT1, [CTXs3, xRT1]; \
+ eor RX, RX, RT0; \
+ \
+ ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+ eor RX, RX, RT1; \
+ ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+ \
+ add RT0, RX, RY, lsl #1; \
+ add RX, RX, RY; \
+ add RT0, RT0, RT3; \
+ add RX, RX, RT2; \
+ eor rd, RT0, rd, ror #31; \
+ eor rc, rc, RX;
+
+#define dummy(x) /*_*/
+
+#define ror1(r) \
+ ror r, r, #1;
+
+#define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \
+ and RT3, RMASK, b, lsl#(2 - (adj_b)); \
+ and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \
+ ror_b(b); \
+ and RT2, RMASK, a, lsl#(2); \
+ and RT0, RMASK, a, lsr#(8 - 2); \
+ \
+ ldr RY, [CTXs1, xRT3]; \
+ ldr RX, [CTXs0, xRT2]; \
+ and RT3, RMASK, b, lsr#(16 - 2); \
+ ldr RT1, [CTXs2, xRT1]; \
+ and RT2, RMASK, a, lsr#(16 - 2); \
+ ldr RT0, [CTXs1, xRT0]; \
+ \
+ ldr RT3, [CTXs3, xRT3]; \
+ eor RY, RY, RT1; \
+ \
+ and RT1, RMASK, b, lsr#(24 - 2); \
+ eor RX, RX, RT0; \
+ ldr RT2, [CTXs2, xRT2]; \
+ and RT0, RMASK, a, lsr#(24 - 2); \
+ \
+ ldr RT1, [CTXs0, xRT1]; \
+ \
+ eor RY, RY, RT3; \
+ ldr RT0, [CTXs3, xRT0]; \
+ eor RX, RX, RT2; \
+ eor RY, RY, RT1; \
+ \
+ ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+ eor RX, RX, RT0; \
+ ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+ \
+ add RT0, RX, RY, lsl #1; \
+ add RX, RX, RY; \
+ add RT0, RT0, RT1; \
+ add RX, RX, RT2; \
+ eor rd, rd, RT0; \
+ eor rc, RX, rc, ror #31;
+
+#define first_encrypt_cycle(nc) \
+ encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \
+ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define encrypt_cycle(nc) \
+ encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define last_encrypt_cycle(nc) \
+ encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+ ror1(RA);
+
+#define first_decrypt_cycle(nc) \
+ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \
+ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define decrypt_cycle(nc) \
+ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define last_decrypt_cycle(nc) \
+ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+ ror1(RD);
+
+.globl _gcry_twofish_arm_encrypt_block
+ELF(.type _gcry_twofish_arm_encrypt_block,%function;)
+
+_gcry_twofish_arm_encrypt_block:
+ /* input:
+ * x0: ctx
+ * x1: dst
+ * x2: src
+ */
+ CFI_STARTPROC();
+
+ add CTXw, CTX, #(w);
+
+ ldr_input_le(RSRC, RA, RB, RC, RD, RT0);
+
+ /* Input whitening */
+ ldp RT0, RT1, [CTXw, #(0*8)];
+ ldp RT2, RT3, [CTXw, #(1*8)];
+ add CTXs3, CTX, #(s3);
+ add CTXs2, CTX, #(s2);
+ add CTXs1, CTX, #(s1);
+ mov RMASK, #(0xff << 2);
+ eor RA, RA, RT0;
+ eor RB, RB, RT1;
+ eor RC, RC, RT2;
+ eor RD, RD, RT3;
+
+ first_encrypt_cycle(0);
+ encrypt_cycle(1);
+ encrypt_cycle(2);
+ encrypt_cycle(3);
+ encrypt_cycle(4);
+ encrypt_cycle(5);
+ encrypt_cycle(6);
+ last_encrypt_cycle(7);
+
+ /* Output whitening */
+ ldp RT0, RT1, [CTXw, #(2*8)];
+ ldp RT2, RT3, [CTXw, #(3*8)];
+ eor RC, RC, RT0;
+ eor RD, RD, RT1;
+ eor RA, RA, RT2;
+ eor RB, RB, RT3;
+
+ str_output_le(RDST, RC, RD, RA, RB, RT0, RT1);
+
+ ret;
+ CFI_ENDPROC();
+.ltorg
+ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;)
+
+.globl _gcry_twofish_arm_decrypt_block
+ELF(.type _gcry_twofish_arm_decrypt_block,%function;)
+
+_gcry_twofish_arm_decrypt_block:
+ /* input:
+ * %r0: ctx
+ * %r1: dst
+ * %r2: src
+ */
+ CFI_STARTPROC();
+
+ add CTXw, CTX, #(w);
+
+ ldr_input_le(RSRC, RC, RD, RA, RB, RT0);
+
+ /* Input whitening */
+ ldp RT0, RT1, [CTXw, #(2*8)];
+ ldp RT2, RT3, [CTXw, #(3*8)];
+ add CTXs3, CTX, #(s3);
+ add CTXs2, CTX, #(s2);
+ add CTXs1, CTX, #(s1);
+ mov RMASK, #(0xff << 2);
+ eor RC, RC, RT0;
+ eor RD, RD, RT1;
+ eor RA, RA, RT2;
+ eor RB, RB, RT3;
+
+ first_decrypt_cycle(7);
+ decrypt_cycle(6);
+ decrypt_cycle(5);
+ decrypt_cycle(4);
+ decrypt_cycle(3);
+ decrypt_cycle(2);
+ decrypt_cycle(1);
+ last_decrypt_cycle(0);
+
+ /* Output whitening */
+ ldp RT0, RT1, [CTXw, #(0*8)];
+ ldp RT2, RT3, [CTXw, #(1*8)];
+ eor RA, RA, RT0;
+ eor RB, RB, RT1;
+ eor RC, RC, RT2;
+ eor RD, RD, RT3;
+
+ str_output_le(RDST, RA, RB, RC, RD, RT0, RT1);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;)
+
+#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
+#endif /*__AARCH64EL__*/
diff --git a/comm/third_party/libgcrypt/cipher/twofish-amd64.S b/comm/third_party/libgcrypt/cipher/twofish-amd64.S
new file mode 100644
index 0000000000..3cb734317d
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/twofish-amd64.S
@@ -0,0 +1,1184 @@
+/* twofish-amd64.S - AMD64 assembly implementation of Twofish cipher
+ *
+ * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* structure of TWOFISH_context: */
+#define s0 0
+#define s1 ((s0) + 4 * 256)
+#define s2 ((s1) + 4 * 256)
+#define s3 ((s2) + 4 * 256)
+#define w ((s3) + 4 * 256)
+#define k ((w) + 4 * 8)
+
+/* register macros */
+#define CTX %rdi
+
+#define RA %rax
+#define RB %rbx
+#define RC %rcx
+#define RD %rdx
+
+#define RAd %eax
+#define RBd %ebx
+#define RCd %ecx
+#define RDd %edx
+
+#define RAbl %al
+#define RBbl %bl
+#define RCbl %cl
+#define RDbl %dl
+
+#define RAbh %ah
+#define RBbh %bh
+#define RCbh %ch
+#define RDbh %dh
+
+#define RX %r8
+#define RY %r9
+
+#define RXd %r8d
+#define RYd %r9d
+
+#define RT0 %rsi
+#define RT1 %rbp
+#define RT2 %r10
+#define RT3 %r11
+
+#define RT0d %esi
+#define RT1d %ebp
+#define RT2d %r10d
+#define RT3d %r11d
+
+/***********************************************************************
+ * AMD64 assembly implementation of the Twofish cipher
+ ***********************************************************************/
+#define enc_g1_2(a, b, x, y) \
+ movzbl b ## bl, RT3d; \
+ movzbl b ## bh, RT1d; \
+ movzbl a ## bl, RT2d; \
+ movzbl a ## bh, RT0d; \
+ rorl $16, b ## d; \
+ rorl $16, a ## d; \
+ movl s1(CTX, RT3, 4), RYd; \
+ movzbl b ## bl, RT3d; \
+ movl s0(CTX, RT2, 4), RXd; \
+ movzbl a ## bl, RT2d; \
+ xorl s2(CTX, RT1, 4), RYd; \
+ movzbl b ## bh, RT1d; \
+ xorl s1(CTX, RT0, 4), RXd; \
+ movzbl a ## bh, RT0d; \
+ rorl $16, b ## d; \
+ rorl $16, a ## d; \
+ xorl s3(CTX, RT3, 4), RYd; \
+ xorl s2(CTX, RT2, 4), RXd; \
+ xorl s0(CTX, RT1, 4), RYd; \
+ xorl s3(CTX, RT0, 4), RXd;
+
+#define dec_g1_2(a, b, x, y) \
+ movzbl a ## bl, RT2d; \
+ movzbl a ## bh, RT0d; \
+ movzbl b ## bl, RT3d; \
+ movzbl b ## bh, RT1d; \
+ rorl $16, a ## d; \
+ rorl $16, b ## d; \
+ movl s0(CTX, RT2, 4), RXd; \
+ movzbl a ## bl, RT2d; \
+ movl s1(CTX, RT3, 4), RYd; \
+ movzbl b ## bl, RT3d; \
+ xorl s1(CTX, RT0, 4), RXd; \
+ movzbl a ## bh, RT0d; \
+ xorl s2(CTX, RT1, 4), RYd; \
+ movzbl b ## bh, RT1d; \
+ rorl $16, a ## d; \
+ rorl $16, b ## d; \
+ xorl s2(CTX, RT2, 4), RXd; \
+ xorl s3(CTX, RT3, 4), RYd; \
+ xorl s3(CTX, RT0, 4), RXd; \
+ xorl s0(CTX, RT1, 4), RYd;
+
+#define encrypt_round(ra, rb, rc, rd, n) \
+ enc_g1_2(##ra, ##rb, RX, RY); \
+ \
+ leal (RXd, RYd, 2), RT0d; \
+ addl RYd, RXd; \
+ addl (k + 8 * (n) + 4)(CTX), RT0d; \
+ roll $1, rd ## d; \
+ addl (k + 8 * (n))(CTX), RXd; \
+ xorl RT0d, rd ## d; \
+ xorl RXd, rc ## d; \
+ rorl $1, rc ## d;
+
+#define decrypt_round(ra, rb, rc, rd, n) \
+ dec_g1_2(##ra, ##rb, RX, RY); \
+ \
+ leal (RXd, RYd, 2), RT0d; \
+ addl RYd, RXd; \
+ addl (k + 8 * (n) + 4)(CTX), RT0d; \
+ roll $1, rc ## d; \
+ addl (k + 8 * (n))(CTX), RXd; \
+ xorl RXd, rc ## d; \
+ xorl RT0d, rd ## d; \
+ rorl $1, rd ## d;
+
+#define encrypt_cycle(a, b, c, d, nc) \
+ encrypt_round(##a, ##b, ##c, ##d, (nc) * 2); \
+ encrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1);
+
+#define decrypt_cycle(a, b, c, d, nc) \
+ decrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1); \
+ decrypt_round(##a, ##b, ##c, ##d, (nc) * 2);
+
+#define inpack(in, n, x, m) \
+ movl (4 * (n))(in), x; \
+ xorl (w + 4 * (m))(CTX), x;
+
+#define outunpack(out, n, x, m) \
+ xorl (w + 4 * (m))(CTX), x; \
+ movl x, (4 * (n))(out);
+
+.align 8
+.globl _gcry_twofish_amd64_encrypt_block
+ELF(.type _gcry_twofish_amd64_encrypt_block,@function;)
+
+_gcry_twofish_amd64_encrypt_block:
+ /* input:
+ * %rdi: context, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ subq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(3 * 8);
+ movq %rsi, (0 * 8)(%rsp);
+ movq %rbp, (1 * 8)(%rsp);
+ movq %rbx, (2 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 1 * 8);
+ CFI_REL_OFFSET(%rbx, 2 * 8);
+
+ movq %rdx, RX;
+ inpack(RX, 0, RAd, 0);
+ inpack(RX, 1, RBd, 1);
+ inpack(RX, 2, RCd, 2);
+ inpack(RX, 3, RDd, 3);
+
+ encrypt_cycle(RA, RB, RC, RD, 0);
+ encrypt_cycle(RA, RB, RC, RD, 1);
+ encrypt_cycle(RA, RB, RC, RD, 2);
+ encrypt_cycle(RA, RB, RC, RD, 3);
+ encrypt_cycle(RA, RB, RC, RD, 4);
+ encrypt_cycle(RA, RB, RC, RD, 5);
+ encrypt_cycle(RA, RB, RC, RD, 6);
+ encrypt_cycle(RA, RB, RC, RD, 7);
+
+ movq (0 * 8)(%rsp), RX; /*dst*/
+ outunpack(RX, 0, RCd, 4);
+ outunpack(RX, 1, RDd, 5);
+ outunpack(RX, 2, RAd, 6);
+ outunpack(RX, 3, RBd, 7);
+
+ movq (2 * 8)(%rsp), %rbx;
+ movq (1 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
+ addq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-3 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
+
+.align 8
+.globl _gcry_twofish_amd64_decrypt_block
+ELF(.type _gcry_twofish_amd64_decrypt_block,@function;)
+
+_gcry_twofish_amd64_decrypt_block:
+ /* input:
+ * %rdi: context, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ subq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(3 * 8);
+ movq %rsi, (0 * 8)(%rsp);
+ movq %rbp, (1 * 8)(%rsp);
+ movq %rbx, (2 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 1 * 8);
+ CFI_REL_OFFSET(%rbx, 2 * 8);
+
+ movq %rdx, RX;
+ inpack(RX, 0, RCd, 4);
+ inpack(RX, 1, RDd, 5);
+ inpack(RX, 2, RAd, 6);
+ inpack(RX, 3, RBd, 7);
+
+ decrypt_cycle(RA, RB, RC, RD, 7);
+ decrypt_cycle(RA, RB, RC, RD, 6);
+ decrypt_cycle(RA, RB, RC, RD, 5);
+ decrypt_cycle(RA, RB, RC, RD, 4);
+ decrypt_cycle(RA, RB, RC, RD, 3);
+ decrypt_cycle(RA, RB, RC, RD, 2);
+ decrypt_cycle(RA, RB, RC, RD, 1);
+ decrypt_cycle(RA, RB, RC, RD, 0);
+
+ movq (0 * 8)(%rsp), RX; /*dst*/
+ outunpack(RX, 0, RAd, 0);
+ outunpack(RX, 1, RBd, 1);
+ outunpack(RX, 2, RCd, 2);
+ outunpack(RX, 3, RDd, 3);
+
+ movq (2 * 8)(%rsp), %rbx;
+ movq (1 * 8)(%rsp), %rbp;
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%rbp);
+ addq $(3 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-3 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
+
+#undef CTX
+
+#undef RA
+#undef RB
+#undef RC
+#undef RD
+
+#undef RAd
+#undef RBd
+#undef RCd
+#undef RDd
+
+#undef RAbl
+#undef RBbl
+#undef RCbl
+#undef RDbl
+
+#undef RAbh
+#undef RBbh
+#undef RCbh
+#undef RDbh
+
+#undef RX
+#undef RY
+
+#undef RXd
+#undef RYd
+
+#undef RT0
+#undef RT1
+#undef RT2
+#undef RT3
+
+#undef RT0d
+#undef RT1d
+#undef RT2d
+#undef RT3d
+
+/***********************************************************************
+ * AMD64 assembly implementation of the Twofish cipher, 3-way parallel
+ ***********************************************************************/
+#define CTX %rdi
+#define RIO %rdx
+
+#define RAB0 %rax
+#define RAB1 %rbx
+#define RAB2 %rcx
+
+#define RAB0d %eax
+#define RAB1d %ebx
+#define RAB2d %ecx
+
+#define RAB0bh %ah
+#define RAB1bh %bh
+#define RAB2bh %ch
+
+#define RAB0bl %al
+#define RAB1bl %bl
+#define RAB2bl %cl
+
+#define RCD0 %r8
+#define RCD1 %r9
+#define RCD2 %r10
+
+#define RCD0d %r8d
+#define RCD1d %r9d
+#define RCD2d %r10d
+
+#define RX0 %rbp
+#define RX1 %r11
+#define RX2 %r12
+
+#define RX0d %ebp
+#define RX1d %r11d
+#define RX2d %r12d
+
+#define RY0 %r13
+#define RY1 %r14
+#define RY2 %r15
+
+#define RY0d %r13d
+#define RY1d %r14d
+#define RY2d %r15d
+
+#define RT0 %rdx
+#define RT1 %rsi
+
+#define RT0d %edx
+#define RT1d %esi
+
+#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
+ movzbl ab ## bl, tmp2 ## d; \
+ movzbl ab ## bh, tmp1 ## d; \
+ rorq $(rot), ab; \
+ op1##l T0(CTX, tmp2, 4), dst ## d; \
+ op2##l T1(CTX, tmp1, 4), dst ## d;
+
+/*
+ * Combined G1 & G2 function. Reordered with help of rotates to have moves
+ * at beginning.
+ */
+#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
+ /* G1,1 && G2,1 */ \
+ do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
+ do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
+ \
+ do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
+ do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
+ \
+ do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
+ do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
+ \
+ /* G1,2 && G2,2 */ \
+ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
+ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
+ movq ab ## 0, RT0; \
+ movq cd ## 0, ab ## 0; \
+ movq RT0, cd ## 0; \
+ \
+ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
+ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
+ movq ab ## 1, RT0; \
+ movq cd ## 1, ab ## 1; \
+ movq RT0, cd ## 1; \
+ \
+ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
+ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
+ movq ab ## 2, RT0; \
+ movq cd ## 2, ab ## 2; \
+ movq RT0, cd ## 2;
+
+#define enc_round_end(ab, x, y, n) \
+ addl y ## d, x ## d; \
+ addl x ## d, y ## d; \
+ addl k+4*(2*(n))(CTX), x ## d; \
+ xorl ab ## d, x ## d; \
+ addl k+4*(2*(n)+1)(CTX), y ## d; \
+ shrq $32, ab; \
+ roll $1, ab ## d; \
+ xorl y ## d, ab ## d; \
+ shlq $32, ab; \
+ rorl $1, x ## d; \
+ orq x, ab;
+
+#define dec_round_end(ba, x, y, n) \
+ addl y ## d, x ## d; \
+ addl x ## d, y ## d; \
+ addl k+4*(2*(n))(CTX), x ## d; \
+ addl k+4*(2*(n)+1)(CTX), y ## d; \
+ xorl ba ## d, y ## d; \
+ shrq $32, ba; \
+ roll $1, ba ## d; \
+ xorl x ## d, ba ## d; \
+ shlq $32, ba; \
+ rorl $1, y ## d; \
+ orq y, ba;
+
+#define encrypt_round3(ab, cd, n) \
+ g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
+ \
+ enc_round_end(ab ## 0, RX0, RY0, n); \
+ enc_round_end(ab ## 1, RX1, RY1, n); \
+ enc_round_end(ab ## 2, RX2, RY2, n);
+
+#define decrypt_round3(ba, dc, n) \
+ g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
+ \
+ dec_round_end(ba ## 0, RX0, RY0, n); \
+ dec_round_end(ba ## 1, RX1, RY1, n); \
+ dec_round_end(ba ## 2, RX2, RY2, n);
+
+#define encrypt_cycle3(ab, cd, n) \
+ encrypt_round3(ab, cd, n*2); \
+ encrypt_round3(ab, cd, (n*2)+1);
+
+#define decrypt_cycle3(ba, dc, n) \
+ decrypt_round3(ba, dc, (n*2)+1); \
+ decrypt_round3(ba, dc, (n*2));
+
+#define inpack3(xy, m) \
+ xorq w+4*m(CTX), xy ## 0; \
+ xorq w+4*m(CTX), xy ## 1; \
+ xorq w+4*m(CTX), xy ## 2;
+
+#define outunpack3(xy, m) \
+ xorq w+4*m(CTX), xy ## 0; \
+ xorq w+4*m(CTX), xy ## 1; \
+ xorq w+4*m(CTX), xy ## 2;
+
+#define inpack_enc3() \
+ inpack3(RAB, 0); \
+ inpack3(RCD, 2);
+
+#define outunpack_enc3() \
+ outunpack3(RAB, 6); \
+ outunpack3(RCD, 4);
+
+#define inpack_dec3() \
+ inpack3(RAB, 4); \
+ rorq $32, RAB0; \
+ rorq $32, RAB1; \
+ rorq $32, RAB2; \
+ inpack3(RCD, 6); \
+ rorq $32, RCD0; \
+ rorq $32, RCD1; \
+ rorq $32, RCD2;
+
+#define outunpack_dec3() \
+ rorq $32, RCD0; \
+ rorq $32, RCD1; \
+ rorq $32, RCD2; \
+ outunpack3(RCD, 0); \
+ rorq $32, RAB0; \
+ rorq $32, RAB1; \
+ rorq $32, RAB2; \
+ outunpack3(RAB, 2);
+
+.align 8
+ELF(.type __twofish_enc_blk3,@function;)
+
+__twofish_enc_blk3:
+ /* input:
+ * %rdi: ctx, CTX
+ * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three plaintext blocks
+ * output:
+ * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three ciphertext blocks
+ */
+ CFI_STARTPROC();
+
+ inpack_enc3();
+
+ encrypt_cycle3(RAB, RCD, 0);
+ encrypt_cycle3(RAB, RCD, 1);
+ encrypt_cycle3(RAB, RCD, 2);
+ encrypt_cycle3(RAB, RCD, 3);
+ encrypt_cycle3(RAB, RCD, 4);
+ encrypt_cycle3(RAB, RCD, 5);
+ encrypt_cycle3(RAB, RCD, 6);
+ encrypt_cycle3(RAB, RCD, 7);
+
+ outunpack_enc3();
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
+
+.align 8
+ELF(.type __twofish_dec_blk3,@function;)
+
+__twofish_dec_blk3:
+ /* input:
+ * %rdi: ctx, CTX
+ * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three ciphertext blocks
+ * output:
+ * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three plaintext blocks
+ */
+ CFI_STARTPROC();
+
+ inpack_dec3();
+
+ decrypt_cycle3(RAB, RCD, 7);
+ decrypt_cycle3(RAB, RCD, 6);
+ decrypt_cycle3(RAB, RCD, 5);
+ decrypt_cycle3(RAB, RCD, 4);
+ decrypt_cycle3(RAB, RCD, 3);
+ decrypt_cycle3(RAB, RCD, 2);
+ decrypt_cycle3(RAB, RCD, 1);
+ decrypt_cycle3(RAB, RCD, 0);
+
+ outunpack_dec3();
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
+
+.align 8
+.globl _gcry_twofish_amd64_ctr_enc
+ELF(.type _gcry_twofish_amd64_ctr_enc,@function;)
+_gcry_twofish_amd64_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
+ movq %rbp, (0 * 8)(%rsp);
+ movq %rbx, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ movq %r14, (4 * 8)(%rsp);
+ movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
+
+ movq %rsi, (6 * 8)(%rsp);
+ movq %rdx, (7 * 8)(%rsp);
+ movq %rcx, RX0;
+
+ /* load IV and byteswap */
+ movq 8(RX0), RT0;
+ movq 0(RX0), RT1;
+ movq RT0, RCD0;
+ movq RT1, RAB0;
+ bswapq RT0;
+ bswapq RT1;
+
+ /* construct IVs */
+ movq RT0, RCD1;
+ movq RT1, RAB1;
+ movq RT0, RCD2;
+ movq RT1, RAB2;
+ addq $1, RCD1;
+ adcq $0, RAB1;
+ bswapq RCD1;
+ bswapq RAB1;
+ addq $2, RCD2;
+ adcq $0, RAB2;
+ bswapq RCD2;
+ bswapq RAB2;
+ addq $3, RT0;
+ adcq $0, RT1;
+ bswapq RT0;
+ bswapq RT1;
+
+ /* store new IV */
+ movq RT0, 8(RX0);
+ movq RT1, 0(RX0);
+
+ call __twofish_enc_blk3;
+
+ movq (7 * 8)(%rsp), RX0; /*src*/
+ movq (6 * 8)(%rsp), RX1; /*dst*/
+
+ /* XOR key-stream with plaintext */
+ xorq (0 * 8)(RX0), RCD0;
+ xorq (1 * 8)(RX0), RAB0;
+ xorq (2 * 8)(RX0), RCD1;
+ xorq (3 * 8)(RX0), RAB1;
+ xorq (4 * 8)(RX0), RCD2;
+ xorq (5 * 8)(RX0), RAB2;
+ movq RCD0, (0 * 8)(RX1);
+ movq RAB0, (1 * 8)(RX1);
+ movq RCD1, (2 * 8)(RX1);
+ movq RAB1, (3 * 8)(RX1);
+ movq RCD2, (4 * 8)(RX1);
+ movq RAB2, (5 * 8)(RX1);
+
+ movq (0 * 8)(%rsp), %rbp;
+ movq (1 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ movq (4 * 8)(%rsp), %r14;
+ movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+ addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
+
+.align 8
+.globl _gcry_twofish_amd64_cbc_dec
+ELF(.type _gcry_twofish_amd64_cbc_dec,@function;)
+_gcry_twofish_amd64_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (128bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ subq $(9 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(9 * 8);
+ movq %rbp, (0 * 8)(%rsp);
+ movq %rbx, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ movq %r14, (4 * 8)(%rsp);
+ movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
+
+ movq %rsi, (6 * 8)(%rsp);
+ movq %rdx, (7 * 8)(%rsp);
+ movq %rcx, (8 * 8)(%rsp);
+ movq %rdx, RX0;
+
+ /* load input */
+ movq (0 * 8)(RX0), RAB0;
+ movq (1 * 8)(RX0), RCD0;
+ movq (2 * 8)(RX0), RAB1;
+ movq (3 * 8)(RX0), RCD1;
+ movq (4 * 8)(RX0), RAB2;
+ movq (5 * 8)(RX0), RCD2;
+
+ call __twofish_dec_blk3;
+
+ movq (8 * 8)(%rsp), RT0; /*iv*/
+ movq (7 * 8)(%rsp), RX0; /*src*/
+ movq (6 * 8)(%rsp), RX1; /*dst*/
+
+ movq (4 * 8)(RX0), RY0;
+ movq (5 * 8)(RX0), RY1;
+ xorq (0 * 8)(RT0), RCD0;
+ xorq (1 * 8)(RT0), RAB0;
+ xorq (0 * 8)(RX0), RCD1;
+ xorq (1 * 8)(RX0), RAB1;
+ xorq (2 * 8)(RX0), RCD2;
+ xorq (3 * 8)(RX0), RAB2;
+ movq RY0, (0 * 8)(RT0);
+ movq RY1, (1 * 8)(RT0);
+
+ movq RCD0, (0 * 8)(RX1);
+ movq RAB0, (1 * 8)(RX1);
+ movq RCD1, (2 * 8)(RX1);
+ movq RAB1, (3 * 8)(RX1);
+ movq RCD2, (4 * 8)(RX1);
+ movq RAB2, (5 * 8)(RX1);
+
+ movq (0 * 8)(%rsp), %rbp;
+ movq (1 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ movq (4 * 8)(%rsp), %r14;
+ movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+ addq $(9 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-9 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
+
+.align 8
+.globl _gcry_twofish_amd64_cfb_dec
+ELF(.type _gcry_twofish_amd64_cfb_dec,@function;)
+_gcry_twofish_amd64_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (128bit)
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_0_4
+
+ subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
+ movq %rbp, (0 * 8)(%rsp);
+ movq %rbx, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ movq %r14, (4 * 8)(%rsp);
+ movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
+
+ movq %rsi, (6 * 8)(%rsp);
+ movq %rdx, (7 * 8)(%rsp);
+ movq %rdx, RX0;
+ movq %rcx, RX1;
+
+ /* load input */
+ movq (0 * 8)(RX1), RAB0;
+ movq (1 * 8)(RX1), RCD0;
+ movq (0 * 8)(RX0), RAB1;
+ movq (1 * 8)(RX0), RCD1;
+ movq (2 * 8)(RX0), RAB2;
+ movq (3 * 8)(RX0), RCD2;
+
+ /* Update IV */
+ movq (4 * 8)(RX0), RY0;
+ movq (5 * 8)(RX0), RY1;
+ movq RY0, (0 * 8)(RX1);
+ movq RY1, (1 * 8)(RX1);
+
+ call __twofish_enc_blk3;
+
+ movq (7 * 8)(%rsp), RX0; /*src*/
+ movq (6 * 8)(%rsp), RX1; /*dst*/
+
+ xorq (0 * 8)(RX0), RCD0;
+ xorq (1 * 8)(RX0), RAB0;
+ xorq (2 * 8)(RX0), RCD1;
+ xorq (3 * 8)(RX0), RAB1;
+ xorq (4 * 8)(RX0), RCD2;
+ xorq (5 * 8)(RX0), RAB2;
+ movq RCD0, (0 * 8)(RX1);
+ movq RAB0, (1 * 8)(RX1);
+ movq RCD1, (2 * 8)(RX1);
+ movq RAB1, (3 * 8)(RX1);
+ movq RCD2, (4 * 8)(RX1);
+ movq RAB2, (5 * 8)(RX1);
+
+ movq (0 * 8)(%rsp), %rbp;
+ movq (1 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ movq (4 * 8)(%rsp), %r14;
+ movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+ addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_enc
+ELF(.type _gcry_twofish_amd64_ocb_enc,@function;)
+_gcry_twofish_amd64_ocb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[3])
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_6
+
+ subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
+ movq %rbp, (0 * 8)(%rsp);
+ movq %rbx, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ movq %r14, (4 * 8)(%rsp);
+ movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
+
+ movq %rsi, (6 * 8)(%rsp);
+ movq %rdx, RX0;
+ movq %rcx, RX1;
+ movq %r8, RX2;
+ movq %r9, RY0;
+ movq %rsi, RY1;
+
+ /* Load offset */
+ movq (0 * 8)(RX1), RT0;
+ movq (1 * 8)(RX1), RT1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq (RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (0 * 8)(RX0), RAB0;
+ movq (1 * 8)(RX0), RCD0;
+ /* Store Offset_i */
+ movq RT0, (0 * 8)(RY1);
+ movq RT1, (1 * 8)(RY1);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ xor RAB0, (0 * 8)(RX2);
+ xor RCD0, (1 * 8)(RX2);
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB0;
+ xorq RT1, RCD0;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq 8(RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (2 * 8)(RX0), RAB1;
+ movq (3 * 8)(RX0), RCD1;
+ /* Store Offset_i */
+ movq RT0, (2 * 8)(RY1);
+ movq RT1, (3 * 8)(RY1);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ xor RAB1, (0 * 8)(RX2);
+ xor RCD1, (1 * 8)(RX2);
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB1;
+ xorq RT1, RCD1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq 16(RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (4 * 8)(RX0), RAB2;
+ movq (5 * 8)(RX0), RCD2;
+ /* Store Offset_i */
+ movq RT0, (4 * 8)(RY1);
+ movq RT1, (5 * 8)(RY1);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ xor RAB2, (0 * 8)(RX2);
+ xor RCD2, (1 * 8)(RX2);
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB2;
+ xorq RT1, RCD2;
+
+ /* Store offset */
+ movq RT0, (0 * 8)(RX1);
+ movq RT1, (1 * 8)(RX1);
+
+ /* CX_i = ENCIPHER(K, PX_i) */
+ call __twofish_enc_blk3;
+
+ movq (6 * 8)(%rsp), RX1; /*dst*/
+
+ /* C_i = CX_i xor Offset_i */
+ xorq RCD0, (0 * 8)(RX1);
+ xorq RAB0, (1 * 8)(RX1);
+ xorq RCD1, (2 * 8)(RX1);
+ xorq RAB1, (3 * 8)(RX1);
+ xorq RCD2, (4 * 8)(RX1);
+ xorq RAB2, (5 * 8)(RX1);
+
+ movq (0 * 8)(%rsp), %rbp;
+ movq (1 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ movq (4 * 8)(%rsp), %r14;
+ movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+ addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_dec
+ELF(.type _gcry_twofish_amd64_ocb_dec,@function;)
+_gcry_twofish_amd64_ocb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[3])
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_6
+
+ subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
+ movq %rbp, (0 * 8)(%rsp);
+ movq %rbx, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ movq %r14, (4 * 8)(%rsp);
+ movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
+
+ movq %rsi, (6 * 8)(%rsp);
+ movq %r8, (7 * 8)(%rsp);
+ movq %rdx, RX0;
+ movq %rcx, RX1;
+ movq %r9, RY0;
+ movq %rsi, RY1;
+
+ /* Load offset */
+ movq (0 * 8)(RX1), RT0;
+ movq (1 * 8)(RX1), RT1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq (RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (0 * 8)(RX0), RAB0;
+ movq (1 * 8)(RX0), RCD0;
+ /* Store Offset_i */
+ movq RT0, (0 * 8)(RY1);
+ movq RT1, (1 * 8)(RY1);
+ /* CX_i = C_i xor Offset_i */
+ xorq RT0, RAB0;
+ xorq RT1, RCD0;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq 8(RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (2 * 8)(RX0), RAB1;
+ movq (3 * 8)(RX0), RCD1;
+ /* Store Offset_i */
+ movq RT0, (2 * 8)(RY1);
+ movq RT1, (3 * 8)(RY1);
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB1;
+ xorq RT1, RCD1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq 16(RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (4 * 8)(RX0), RAB2;
+ movq (5 * 8)(RX0), RCD2;
+ /* Store Offset_i */
+ movq RT0, (4 * 8)(RY1);
+ movq RT1, (5 * 8)(RY1);
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB2;
+ xorq RT1, RCD2;
+
+ /* Store offset */
+ movq RT0, (0 * 8)(RX1);
+ movq RT1, (1 * 8)(RX1);
+
+ /* PX_i = DECIPHER(K, CX_i) */
+ call __twofish_dec_blk3;
+
+ movq (7 * 8)(%rsp), RX2; /*checksum*/
+ movq (6 * 8)(%rsp), RX1; /*dst*/
+
+ /* Load checksum */
+ movq (0 * 8)(RX2), RT0;
+ movq (1 * 8)(RX2), RT1;
+
+ /* P_i = PX_i xor Offset_i */
+ xorq RCD0, (0 * 8)(RX1);
+ xorq RAB0, (1 * 8)(RX1);
+ xorq RCD1, (2 * 8)(RX1);
+ xorq RAB1, (3 * 8)(RX1);
+ xorq RCD2, (4 * 8)(RX1);
+ xorq RAB2, (5 * 8)(RX1);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ xorq (0 * 8)(RX1), RT0;
+ xorq (1 * 8)(RX1), RT1;
+ xorq (2 * 8)(RX1), RT0;
+ xorq (3 * 8)(RX1), RT1;
+ xorq (4 * 8)(RX1), RT0;
+ xorq (5 * 8)(RX1), RT1;
+
+ /* Store checksum */
+ movq RT0, (0 * 8)(RX2);
+ movq RT1, (1 * 8)(RX2);
+
+ movq (0 * 8)(%rsp), %rbp;
+ movq (1 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ movq (4 * 8)(%rsp), %r14;
+ movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+ addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_auth
+ELF(.type _gcry_twofish_amd64_ocb_auth,@function;)
+_gcry_twofish_amd64_ocb_auth:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: abuf (3 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[3])
+ */
+ CFI_STARTPROC();
+ ENTER_SYSV_FUNC_PARAMS_5
+
+ subq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(8 * 8);
+ movq %rbp, (0 * 8)(%rsp);
+ movq %rbx, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ movq %r14, (4 * 8)(%rsp);
+ movq %r15, (5 * 8)(%rsp);
+ CFI_REL_OFFSET(%rbp, 0 * 8);
+ CFI_REL_OFFSET(%rbx, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+ CFI_REL_OFFSET(%r14, 4 * 8);
+ CFI_REL_OFFSET(%r15, 5 * 8);
+
+ movq %rcx, (6 * 8)(%rsp);
+ movq %rsi, RX0;
+ movq %rdx, RX1;
+ movq %r8, RY0;
+
+ /* Load offset */
+ movq (0 * 8)(RX1), RT0;
+ movq (1 * 8)(RX1), RT1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq (RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (0 * 8)(RX0), RAB0;
+ movq (1 * 8)(RX0), RCD0;
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB0;
+ xorq RT1, RCD0;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq 8(RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (2 * 8)(RX0), RAB1;
+ movq (3 * 8)(RX0), RCD1;
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB1;
+ xorq RT1, RCD1;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ movq 16(RY0), RY2;
+ xorq (0 * 8)(RY2), RT0;
+ xorq (1 * 8)(RY2), RT1;
+ movq (4 * 8)(RX0), RAB2;
+ movq (5 * 8)(RX0), RCD2;
+ /* PX_i = P_i xor Offset_i */
+ xorq RT0, RAB2;
+ xorq RT1, RCD2;
+
+ /* Store offset */
+ movq RT0, (0 * 8)(RX1);
+ movq RT1, (1 * 8)(RX1);
+
+ /* C_i = ENCIPHER(K, PX_i) */
+ call __twofish_enc_blk3;
+
+ movq (6 * 8)(%rsp), RX1; /*checksum*/
+
+ /* Checksum_i = C_i xor Checksum_i */
+ xorq RCD0, RCD1;
+ xorq RAB0, RAB1;
+ xorq RCD1, RCD2;
+ xorq RAB1, RAB2;
+ xorq RCD2, (0 * 8)(RX1);
+ xorq RAB2, (1 * 8)(RX1);
+
+ movq (0 * 8)(%rsp), %rbp;
+ movq (1 * 8)(%rsp), %rbx;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ movq (4 * 8)(%rsp), %r14;
+ movq (5 * 8)(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+ addq $(8 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+ EXIT_SYSV_FUNC
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;)
+
+#endif /*USE_TWOFISH*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/twofish-arm.S b/comm/third_party/libgcrypt/cipher/twofish-arm.S
new file mode 100644
index 0000000000..2e1da6cd15
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/twofish-arm.S
@@ -0,0 +1,363 @@
+/* twofish-arm.S - ARM assembly implementation of Twofish cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* structure of TWOFISH_context: */
+#define s0 0
+#define s1 ((s0) + 4 * 256)
+#define s2 ((s1) + 4 * 256)
+#define s3 ((s2) + 4 * 256)
+#define w ((s3) + 4 * 256)
+#define k ((w) + 4 * 8)
+
+/* register macros */
+#define CTX %r0
+#define CTXs0 %r0
+#define CTXs1 %r1
+#define CTXs3 %r7
+
+#define RA %r3
+#define RB %r4
+#define RC %r5
+#define RD %r6
+
+#define RX %r2
+#define RY %ip
+
+#define RMASK %lr
+
+#define RT0 %r8
+#define RT1 %r9
+#define RT2 %r10
+#define RT3 %r11
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+ ldrb rout, [rsrc, #((offs) + 0)]; \
+ ldrb rtmp, [rsrc, #((offs) + 1)]; \
+ orr rout, rout, rtmp, lsl #8; \
+ ldrb rtmp, [rsrc, #((offs) + 2)]; \
+ orr rout, rout, rtmp, lsl #16; \
+ ldrb rtmp, [rsrc, #((offs) + 3)]; \
+ orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+ mov rtmp0, rin, lsr #8; \
+ strb rin, [rdst, #((offs) + 0)]; \
+ mov rtmp1, rin, lsr #16; \
+ strb rtmp0, [rdst, #((offs) + 1)]; \
+ mov rtmp0, rin, lsr #24; \
+ strb rtmp1, [rdst, #((offs) + 2)]; \
+ strb rtmp0, [rdst, #((offs) + 3)];
+
+#ifndef __ARMEL__
+ /* bswap on big-endian */
+ #define host_to_le(reg) \
+ rev reg, reg;
+ #define le_to_host(reg) \
+ rev reg, reg;
+#else
+ /* nop on little-endian */
+ #define host_to_le(reg) /*_*/
+ #define le_to_host(reg) /*_*/
+#endif
+
+#define ldr_input_aligned_le(rin, a, b, c, d) \
+ ldr a, [rin, #0]; \
+ ldr b, [rin, #4]; \
+ le_to_host(a); \
+ ldr c, [rin, #8]; \
+ le_to_host(b); \
+ ldr d, [rin, #12]; \
+ le_to_host(c); \
+ le_to_host(d);
+
+#define str_output_aligned_le(rout, a, b, c, d) \
+ le_to_host(a); \
+ le_to_host(b); \
+ str a, [rout, #0]; \
+ le_to_host(c); \
+ str b, [rout, #4]; \
+ le_to_host(d); \
+ str c, [rout, #8]; \
+ str d, [rout, #12];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+ /* unaligned word reads/writes allowed */
+ #define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \
+ ldr_input_aligned_le(rin, ra, rb, rc, rd)
+
+ #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+ str_output_aligned_le(rout, ra, rb, rc, rd)
+#else
+ /* need to handle unaligned reads/writes by byte reads */
+ #define ldr_input_le(rin, ra, rb, rc, rd, rtmp0) \
+ tst rin, #3; \
+ beq 1f; \
+ ldr_unaligned_le(ra, rin, 0, rtmp0); \
+ ldr_unaligned_le(rb, rin, 4, rtmp0); \
+ ldr_unaligned_le(rc, rin, 8, rtmp0); \
+ ldr_unaligned_le(rd, rin, 12, rtmp0); \
+ b 2f; \
+ 1:;\
+ ldr_input_aligned_le(rin, ra, rb, rc, rd); \
+ 2:;
+
+ #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+ tst rout, #3; \
+ beq 1f; \
+ str_unaligned_le(ra, rout, 0, rtmp0, rtmp1); \
+ str_unaligned_le(rb, rout, 4, rtmp0, rtmp1); \
+ str_unaligned_le(rc, rout, 8, rtmp0, rtmp1); \
+ str_unaligned_le(rd, rout, 12, rtmp0, rtmp1); \
+ b 2f; \
+ 1:;\
+ str_output_aligned_le(rout, ra, rb, rc, rd); \
+ 2:;
+#endif
+
+/**********************************************************************
+ 1-way twofish
+ **********************************************************************/
+#define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \
+ and RT0, RMASK, b, lsr#(8 - 2); \
+ and RY, RMASK, b, lsr#(16 - 2); \
+ add RT0, RT0, #(s2 - s1); \
+ and RT1, RMASK, b, lsr#(24 - 2); \
+ ldr RY, [CTXs3, RY]; \
+ and RT2, RMASK, b, lsl#(2); \
+ ldr RT0, [CTXs1, RT0]; \
+ and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \
+ ldr RT1, [CTXs0, RT1]; \
+ and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \
+ ldr RT2, [CTXs1, RT2]; \
+ add RT3, RT3, #(s2 - s1); \
+ ldr RX, [CTXs1, RX]; \
+ ror_a(a); \
+ \
+ eor RY, RY, RT0; \
+ ldr RT3, [CTXs1, RT3]; \
+ and RT0, RMASK, a, lsl#(2); \
+ eor RY, RY, RT1; \
+ and RT1, RMASK, a, lsr#(24 - 2); \
+ eor RY, RY, RT2; \
+ ldr RT0, [CTXs0, RT0]; \
+ eor RX, RX, RT3; \
+ ldr RT1, [CTXs3, RT1]; \
+ eor RX, RX, RT0; \
+ \
+ ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+ eor RX, RX, RT1; \
+ ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+ \
+ add RT0, RX, RY, lsl #1; \
+ add RX, RX, RY; \
+ add RT0, RT0, RT3; \
+ add RX, RX, RT2; \
+ eor rd, RT0, rd, ror #31; \
+ eor rc, rc, RX;
+
+#define dummy(x) /*_*/
+
+#define ror1(r) \
+ ror r, r, #1;
+
+#define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \
+ and RT3, RMASK, b, lsl#(2 - (adj_b)); \
+ and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \
+ ror_b(b); \
+ and RT2, RMASK, a, lsl#(2); \
+ and RT0, RMASK, a, lsr#(8 - 2); \
+ \
+ ldr RY, [CTXs1, RT3]; \
+ add RT1, RT1, #(s2 - s1); \
+ ldr RX, [CTXs0, RT2]; \
+ and RT3, RMASK, b, lsr#(16 - 2); \
+ ldr RT1, [CTXs1, RT1]; \
+ and RT2, RMASK, a, lsr#(16 - 2); \
+ ldr RT0, [CTXs1, RT0]; \
+ \
+ add RT2, RT2, #(s2 - s1); \
+ ldr RT3, [CTXs3, RT3]; \
+ eor RY, RY, RT1; \
+ \
+ and RT1, RMASK, b, lsr#(24 - 2); \
+ eor RX, RX, RT0; \
+ ldr RT2, [CTXs1, RT2]; \
+ and RT0, RMASK, a, lsr#(24 - 2); \
+ \
+ ldr RT1, [CTXs0, RT1]; \
+ \
+ eor RY, RY, RT3; \
+ ldr RT0, [CTXs3, RT0]; \
+ eor RX, RX, RT2; \
+ eor RY, RY, RT1; \
+ \
+ ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+ eor RX, RX, RT0; \
+ ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+ \
+ add RT0, RX, RY, lsl #1; \
+ add RX, RX, RY; \
+ add RT0, RT0, RT1; \
+ add RX, RX, RT2; \
+ eor rd, rd, RT0; \
+ eor rc, RX, rc, ror #31;
+
+#define first_encrypt_cycle(nc) \
+ encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \
+ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define encrypt_cycle(nc) \
+ encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define last_encrypt_cycle(nc) \
+ encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+ ror1(RA);
+
+#define first_decrypt_cycle(nc) \
+ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \
+ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define decrypt_cycle(nc) \
+ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define last_decrypt_cycle(nc) \
+ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+ ror1(RD);
+
+.align 3
+.globl _gcry_twofish_arm_encrypt_block
+.type _gcry_twofish_arm_encrypt_block,%function;
+
+_gcry_twofish_arm_encrypt_block:
+ /* input:
+ * %r0: ctx
+ * %r1: dst
+ * %r2: src
+ */
+ push {%r1, %r4-%r11, %ip, %lr};
+
+ add RY, CTXs0, #w;
+
+ ldr_input_le(%r2, RA, RB, RC, RD, RT0);
+
+ /* Input whitening */
+ ldm RY, {RT0, RT1, RT2, RT3};
+ add CTXs3, CTXs0, #(s3 - s0);
+ add CTXs1, CTXs0, #(s1 - s0);
+ mov RMASK, #(0xff << 2);
+ eor RA, RA, RT0;
+ eor RB, RB, RT1;
+ eor RC, RC, RT2;
+ eor RD, RD, RT3;
+
+ first_encrypt_cycle(0);
+ encrypt_cycle(1);
+ encrypt_cycle(2);
+ encrypt_cycle(3);
+ encrypt_cycle(4);
+ encrypt_cycle(5);
+ encrypt_cycle(6);
+ last_encrypt_cycle(7);
+
+ add RY, CTXs3, #(w + 4*4 - s3);
+ pop {%r1}; /* dst */
+
+ /* Output whitening */
+ ldm RY, {RT0, RT1, RT2, RT3};
+ eor RC, RC, RT0;
+ eor RD, RD, RT1;
+ eor RA, RA, RT2;
+ eor RB, RB, RT3;
+
+ str_output_le(%r1, RC, RD, RA, RB, RT0, RT1);
+
+ pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
+
+.align 3
+.globl _gcry_twofish_arm_decrypt_block
+.type _gcry_twofish_arm_decrypt_block,%function;
+
+_gcry_twofish_arm_decrypt_block:
+ /* input:
+ * %r0: ctx
+ * %r1: dst
+ * %r2: src
+ */
+ push {%r1, %r4-%r11, %ip, %lr};
+
+ add CTXs3, CTXs0, #(s3 - s0);
+
+ ldr_input_le(%r2, RC, RD, RA, RB, RT0);
+
+ add RY, CTXs3, #(w + 4*4 - s3);
+ add CTXs3, CTXs0, #(s3 - s0);
+
+ /* Input whitening */
+ ldm RY, {RT0, RT1, RT2, RT3};
+ add CTXs1, CTXs0, #(s1 - s0);
+ mov RMASK, #(0xff << 2);
+ eor RC, RC, RT0;
+ eor RD, RD, RT1;
+ eor RA, RA, RT2;
+ eor RB, RB, RT3;
+
+ first_decrypt_cycle(7);
+ decrypt_cycle(6);
+ decrypt_cycle(5);
+ decrypt_cycle(4);
+ decrypt_cycle(3);
+ decrypt_cycle(2);
+ decrypt_cycle(1);
+ last_decrypt_cycle(0);
+
+ add RY, CTXs0, #w;
+ pop {%r1}; /* dst */
+
+ /* Output whitening */
+ ldm RY, {RT0, RT1, RT2, RT3};
+ eor RA, RA, RT0;
+ eor RB, RB, RT1;
+ eor RC, RC, RT2;
+ eor RD, RD, RT3;
+
+ str_output_le(%r1, RA, RB, RC, RD, RT0, RT1);
+
+ pop {%r4-%r11, %ip, %pc};
+.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARMEL__*/
diff --git a/comm/third_party/libgcrypt/cipher/twofish-avx2-amd64.S b/comm/third_party/libgcrypt/cipher/twofish-avx2-amd64.S
new file mode 100644
index 0000000000..74cad35589
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/twofish-avx2-amd64.S
@@ -0,0 +1,1048 @@
+/* twofish-avx2-amd64.S - AMD64/AVX2 assembly implementation of Twofish cipher
+ *
+ * Copyright (C) 2013-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) && \
+ defined(ENABLE_AVX2_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* structure of TWOFISH_context: */
+#define s0 0
+#define s1 ((s0) + 4 * 256)
+#define s2 ((s1) + 4 * 256)
+#define s3 ((s2) + 4 * 256)
+#define w ((s3) + 4 * 256)
+#define k ((w) + 4 * 8)
+
+/* register macros */
+#define CTX %rdi
+
+#define RROUND %rbp
+#define RROUNDd %ebp
+#define RS0 CTX
+#define RS1 %r8
+#define RS2 %r9
+#define RS3 %r10
+#define RK %r11
+#define RW %rax
+
+#define RA0 %ymm8
+#define RB0 %ymm9
+#define RC0 %ymm10
+#define RD0 %ymm11
+#define RA1 %ymm12
+#define RB1 %ymm13
+#define RC1 %ymm14
+#define RD1 %ymm15
+
+/* temp regs */
+#define RX0 %ymm0
+#define RY0 %ymm1
+#define RX1 %ymm2
+#define RY1 %ymm3
+#define RT0 %ymm4
+#define RIDX %ymm5
+
+#define RX0x %xmm0
+#define RY0x %xmm1
+#define RX1x %xmm2
+#define RY1x %xmm3
+#define RT0x %xmm4
+#define RIDXx %xmm5
+
+#define RTMP0 RX0
+#define RTMP0x RX0x
+#define RTMP1 RX1
+#define RTMP1x RX1x
+#define RTMP2 RY0
+#define RTMP2x RY0x
+#define RTMP3 RY1
+#define RTMP3x RY1x
+#define RTMP4 RIDX
+#define RTMP4x RIDXx
+
+/* vpgatherdd mask and '-1' */
+#define RNOT %ymm6
+#define RNOTx %xmm6
+
+/* byte mask, (-1 >> 24) */
+#define RBYTE %ymm7
+
+/**********************************************************************
+ 16-way AVX2 twofish
+ **********************************************************************/
+#define init_round_constants() \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ leaq k(CTX), RK; \
+ leaq w(CTX), RW; \
+ vpsrld $24, RNOT, RBYTE; \
+ leaq s1(CTX), RS1; \
+ leaq s2(CTX), RS2; \
+ leaq s3(CTX), RS3; \
+
+#define g16(ab, rs0, rs1, rs2, rs3, xy) \
+ vpand RBYTE, ab ## 0, RIDX; \
+ vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ \
+ vpand RBYTE, ab ## 1, RIDX; \
+ vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ \
+ vpsrld $8, ab ## 0, RIDX; \
+ vpand RBYTE, RIDX, RIDX; \
+ vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ vpxor RT0, xy ## 0, xy ## 0; \
+ \
+ vpsrld $8, ab ## 1, RIDX; \
+ vpand RBYTE, RIDX, RIDX; \
+ vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ vpxor RT0, xy ## 1, xy ## 1; \
+ \
+ vpsrld $16, ab ## 0, RIDX; \
+ vpand RBYTE, RIDX, RIDX; \
+ vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ vpxor RT0, xy ## 0, xy ## 0; \
+ \
+ vpsrld $16, ab ## 1, RIDX; \
+ vpand RBYTE, RIDX, RIDX; \
+ vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ vpxor RT0, xy ## 1, xy ## 1; \
+ \
+ vpsrld $24, ab ## 0, RIDX; \
+ vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ vpxor RT0, xy ## 0, xy ## 0; \
+ \
+ vpsrld $24, ab ## 1, RIDX; \
+ vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
+ vpcmpeqd RNOT, RNOT, RNOT; \
+ vpxor RT0, xy ## 1, xy ## 1;
+
+#define g1_16(a, x) \
+ g16(a, RS0, RS1, RS2, RS3, x);
+
+#define g2_16(b, y) \
+ g16(b, RS1, RS2, RS3, RS0, y);
+
+#define encrypt_round_end16(a, b, c, d, nk, r) \
+ vpaddd RY0, RX0, RX0; \
+ vpaddd RX0, RY0, RY0; \
+ vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RX0, RX0; \
+ vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RY0, RY0; \
+ \
+ vpxor RY0, d ## 0, d ## 0; \
+ \
+ vpxor RX0, c ## 0, c ## 0; \
+ vpsrld $1, c ## 0, RT0; \
+ vpslld $31, c ## 0, c ## 0; \
+ vpor RT0, c ## 0, c ## 0; \
+ \
+ vpaddd RY1, RX1, RX1; \
+ vpaddd RX1, RY1, RY1; \
+ vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RX1, RX1; \
+ vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RY1, RY1; \
+ \
+ vpxor RY1, d ## 1, d ## 1; \
+ \
+ vpxor RX1, c ## 1, c ## 1; \
+ vpsrld $1, c ## 1, RT0; \
+ vpslld $31, c ## 1, c ## 1; \
+ vpor RT0, c ## 1, c ## 1; \
+
+#define encrypt_round16(a, b, c, d, nk, r) \
+ g2_16(b, RY); \
+ \
+ vpslld $1, b ## 0, RT0; \
+ vpsrld $31, b ## 0, b ## 0; \
+ vpor RT0, b ## 0, b ## 0; \
+ \
+ vpslld $1, b ## 1, RT0; \
+ vpsrld $31, b ## 1, b ## 1; \
+ vpor RT0, b ## 1, b ## 1; \
+ \
+ g1_16(a, RX); \
+ \
+ encrypt_round_end16(a, b, c, d, nk, r);
+
+#define encrypt_round_first16(a, b, c, d, nk, r) \
+ vpslld $1, d ## 0, RT0; \
+ vpsrld $31, d ## 0, d ## 0; \
+ vpor RT0, d ## 0, d ## 0; \
+ \
+ vpslld $1, d ## 1, RT0; \
+ vpsrld $31, d ## 1, d ## 1; \
+ vpor RT0, d ## 1, d ## 1; \
+ \
+ encrypt_round16(a, b, c, d, nk, r);
+
+#define encrypt_round_last16(a, b, c, d, nk, r) \
+ g2_16(b, RY); \
+ \
+ g1_16(a, RX); \
+ \
+ encrypt_round_end16(a, b, c, d, nk, r);
+
+#define decrypt_round_end16(a, b, c, d, nk, r) \
+ vpaddd RY0, RX0, RX0; \
+ vpaddd RX0, RY0, RY0; \
+ vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RX0, RX0; \
+ vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RY0, RY0; \
+ \
+ vpxor RX0, c ## 0, c ## 0; \
+ \
+ vpxor RY0, d ## 0, d ## 0; \
+ vpsrld $1, d ## 0, RT0; \
+ vpslld $31, d ## 0, d ## 0; \
+ vpor RT0, d ## 0, d ## 0; \
+ \
+ vpaddd RY1, RX1, RX1; \
+ vpaddd RX1, RY1, RY1; \
+ vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RX1, RX1; \
+ vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+ vpaddd RT0, RY1, RY1; \
+ \
+ vpxor RX1, c ## 1, c ## 1; \
+ \
+ vpxor RY1, d ## 1, d ## 1; \
+ vpsrld $1, d ## 1, RT0; \
+ vpslld $31, d ## 1, d ## 1; \
+ vpor RT0, d ## 1, d ## 1;
+
+#define decrypt_round16(a, b, c, d, nk, r) \
+ g1_16(a, RX); \
+ \
+ vpslld $1, a ## 0, RT0; \
+ vpsrld $31, a ## 0, a ## 0; \
+ vpor RT0, a ## 0, a ## 0; \
+ \
+ vpslld $1, a ## 1, RT0; \
+ vpsrld $31, a ## 1, a ## 1; \
+ vpor RT0, a ## 1, a ## 1; \
+ \
+ g2_16(b, RY); \
+ \
+ decrypt_round_end16(a, b, c, d, nk, r);
+
+#define decrypt_round_first16(a, b, c, d, nk, r) \
+ vpslld $1, c ## 0, RT0; \
+ vpsrld $31, c ## 0, c ## 0; \
+ vpor RT0, c ## 0, c ## 0; \
+ \
+ vpslld $1, c ## 1, RT0; \
+ vpsrld $31, c ## 1, c ## 1; \
+ vpor RT0, c ## 1, c ## 1; \
+ \
+ decrypt_round16(a, b, c, d, nk, r)
+
+#define decrypt_round_last16(a, b, c, d, nk, r) \
+ g1_16(a, RX); \
+ \
+ g2_16(b, RY); \
+ \
+ decrypt_round_end16(a, b, c, d, nk, r);
+
+#define encrypt_cycle16(r) \
+ encrypt_round16(RA, RB, RC, RD, 0, r); \
+ encrypt_round16(RC, RD, RA, RB, 8, r);
+
+#define encrypt_cycle_first16(r) \
+ encrypt_round_first16(RA, RB, RC, RD, 0, r); \
+ encrypt_round16(RC, RD, RA, RB, 8, r);
+
+#define encrypt_cycle_last16(r) \
+ encrypt_round16(RA, RB, RC, RD, 0, r); \
+ encrypt_round_last16(RC, RD, RA, RB, 8, r);
+
+#define decrypt_cycle16(r) \
+ decrypt_round16(RC, RD, RA, RB, 8, r); \
+ decrypt_round16(RA, RB, RC, RD, 0, r);
+
+#define decrypt_cycle_first16(r) \
+ decrypt_round_first16(RC, RD, RA, RB, 8, r); \
+ decrypt_round16(RA, RB, RC, RD, 0, r);
+
+#define decrypt_cycle_last16(r) \
+ decrypt_round16(RC, RD, RA, RB, 8, r); \
+ decrypt_round_last16(RA, RB, RC, RD, 0, r);
+
+#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+#define read_blocks8(offs,a,b,c,d) \
+ vmovdqu 16*offs(RIO), a; \
+ vmovdqu 16*offs+32(RIO), b; \
+ vmovdqu 16*offs+64(RIO), c; \
+ vmovdqu 16*offs+96(RIO), d; \
+ \
+ transpose_4x4(a, b, c, d, RX0, RY0);
+
+#define write_blocks8(offs,a,b,c,d) \
+ transpose_4x4(a, b, c, d, RX0, RY0); \
+ \
+ vmovdqu a, 16*offs(RIO); \
+ vmovdqu b, 16*offs+32(RIO); \
+ vmovdqu c, 16*offs+64(RIO); \
+ vmovdqu d, 16*offs+96(RIO);
+
+#define inpack_enc8(a,b,c,d) \
+ vpbroadcastd 4*0(RW), RT0; \
+ vpxor RT0, a, a; \
+ \
+ vpbroadcastd 4*1(RW), RT0; \
+ vpxor RT0, b, b; \
+ \
+ vpbroadcastd 4*2(RW), RT0; \
+ vpxor RT0, c, c; \
+ \
+ vpbroadcastd 4*3(RW), RT0; \
+ vpxor RT0, d, d;
+
+#define outunpack_enc8(a,b,c,d) \
+ vpbroadcastd 4*4(RW), RX0; \
+ vpbroadcastd 4*5(RW), RY0; \
+ vpxor RX0, c, RX0; \
+ vpxor RY0, d, RY0; \
+ \
+ vpbroadcastd 4*6(RW), RT0; \
+ vpxor RT0, a, c; \
+ vpbroadcastd 4*7(RW), RT0; \
+ vpxor RT0, b, d; \
+ \
+ vmovdqa RX0, a; \
+ vmovdqa RY0, b;
+
+#define inpack_dec8(a,b,c,d) \
+ vpbroadcastd 4*4(RW), RX0; \
+ vpbroadcastd 4*5(RW), RY0; \
+ vpxor RX0, a, RX0; \
+ vpxor RY0, b, RY0; \
+ \
+ vpbroadcastd 4*6(RW), RT0; \
+ vpxor RT0, c, a; \
+ vpbroadcastd 4*7(RW), RT0; \
+ vpxor RT0, d, b; \
+ \
+ vmovdqa RX0, c; \
+ vmovdqa RY0, d;
+
+#define outunpack_dec8(a,b,c,d) \
+ vpbroadcastd 4*0(RW), RT0; \
+ vpxor RT0, a, a; \
+ \
+ vpbroadcastd 4*1(RW), RT0; \
+ vpxor RT0, b, b; \
+ \
+ vpbroadcastd 4*2(RW), RT0; \
+ vpxor RT0, c, c; \
+ \
+ vpbroadcastd 4*3(RW), RT0; \
+ vpxor RT0, d, d;
+
+#define transpose4x4_16(a,b,c,d) \
+ transpose_4x4(a ## 0, b ## 0, c ## 0, d ## 0, RX0, RY0); \
+ transpose_4x4(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0);
+
+#define inpack_enc16(a,b,c,d) \
+ inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
+ inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+#define outunpack_enc16(a,b,c,d) \
+ outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
+ outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+#define inpack_dec16(a,b,c,d) \
+ inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
+ inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+#define outunpack_dec16(a,b,c,d) \
+ outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
+ outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+.align 8
+ELF(.type __twofish_enc_blk16,@function;)
+__twofish_enc_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+ * plaintext blocks
+ * output:
+ * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+ * ciphertext blocks
+ */
+ CFI_STARTPROC();
+ init_round_constants();
+
+ transpose4x4_16(RA, RB, RC, RD);
+ inpack_enc16(RA, RB, RC, RD);
+
+ encrypt_cycle_first16(0);
+ encrypt_cycle16(2);
+ encrypt_cycle16(4);
+ encrypt_cycle16(6);
+ encrypt_cycle16(8);
+ encrypt_cycle16(10);
+ encrypt_cycle16(12);
+ encrypt_cycle_last16(14);
+
+ outunpack_enc16(RA, RB, RC, RD);
+ transpose4x4_16(RA, RB, RC, RD);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;)
+
+.align 8
+ELF(.type __twofish_dec_blk16,@function;)
+__twofish_dec_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+ * plaintext blocks
+ * output:
+ * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+ * ciphertext blocks
+ */
+ CFI_STARTPROC();
+ init_round_constants();
+
+ transpose4x4_16(RA, RB, RC, RD);
+ inpack_dec16(RA, RB, RC, RD);
+
+ decrypt_cycle_first16(14);
+ decrypt_cycle16(12);
+ decrypt_cycle16(10);
+ decrypt_cycle16(8);
+ decrypt_cycle16(6);
+ decrypt_cycle16(4);
+ decrypt_cycle16(2);
+ decrypt_cycle_last16(0);
+
+ outunpack_dec16(RA, RB, RC, RD);
+ transpose4x4_16(RA, RB, RC, RD);
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_twofish_avx2_ctr_enc
+ELF(.type _gcry_twofish_avx2_ctr_enc,@function;)
+_gcry_twofish_avx2_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ movq 8(%rcx), %rax;
+ bswapq %rax;
+
+ vzeroupper;
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
+ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RTMP4x;
+ vpshufb RTMP3x, RTMP4x, RTMP4x;
+ vmovdqa RTMP4x, RTMP0x;
+ inc_le128(RTMP4x, RNOTx, RTMP1x);
+ vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+ vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 16), %rax;
+ ja .Lhandle_ctr_carry;
+
+ /* construct IVs */
+ vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+ vpshufb RTMP3, RTMP0, RB0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+ vpshufb RTMP3, RTMP0, RC0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+ vpshufb RTMP3, RTMP0, RD0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+ vpshufb RTMP3, RTMP0, RA1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+ vpshufb RTMP3, RTMP0, RB1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+ vpshufb RTMP3, RTMP0, RC1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+ vpshufb RTMP3, RTMP0, RD1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+ vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+ jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB0; /* +3 ; +2 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RC0; /* +5 ; +4 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RD0; /* +7 ; +6 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA1; /* +9 ; +8 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RC1; /* +13 ; +12 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RD1; /* +15 ; +14 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vextracti128 $1, RTMP0, RTMP0x;
+ vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+ /* store new IV */
+ vmovdqu RTMP0x, (%rcx);
+
+ call __twofish_enc_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RB0, RB0;
+ vpxor (2 * 32)(%rdx), RC0, RC0;
+ vpxor (3 * 32)(%rdx), RD0, RD0;
+ vpxor (4 * 32)(%rdx), RA1, RA1;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RC1, RC1;
+ vpxor (7 * 32)(%rdx), RD1, RD1;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RB0, (1 * 32)(%rsi);
+ vmovdqu RC0, (2 * 32)(%rsi);
+ vmovdqu RD0, (3 * 32)(%rsi);
+ vmovdqu RA1, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RC1, (6 * 32)(%rsi);
+ vmovdqu RD1, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;)
+
+.align 8
+.globl _gcry_twofish_avx2_cbc_dec
+ELF(.type _gcry_twofish_avx2_cbc_dec,@function;)
+_gcry_twofish_avx2_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ vmovdqu (0 * 32)(%rdx), RA0;
+ vmovdqu (1 * 32)(%rdx), RB0;
+ vmovdqu (2 * 32)(%rdx), RC0;
+ vmovdqu (3 * 32)(%rdx), RD0;
+ vmovdqu (4 * 32)(%rdx), RA1;
+ vmovdqu (5 * 32)(%rdx), RB1;
+ vmovdqu (6 * 32)(%rdx), RC1;
+ vmovdqu (7 * 32)(%rdx), RD1;
+
+ call __twofish_dec_blk16;
+
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RNOT;
+ vpxor RNOT, RA0, RA0;
+ vpxor (0 * 32 + 16)(%rdx), RB0, RB0;
+ vpxor (1 * 32 + 16)(%rdx), RC0, RC0;
+ vpxor (2 * 32 + 16)(%rdx), RD0, RD0;
+ vpxor (3 * 32 + 16)(%rdx), RA1, RA1;
+ vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+ vpxor (5 * 32 + 16)(%rdx), RC1, RC1;
+ vpxor (6 * 32 + 16)(%rdx), RD1, RD1;
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RB0, (1 * 32)(%rsi);
+ vmovdqu RC0, (2 * 32)(%rsi);
+ vmovdqu RD0, (3 * 32)(%rsi);
+ vmovdqu RA1, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RC1, (6 * 32)(%rsi);
+ vmovdqu RD1, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;)
+
+.align 8
+.globl _gcry_twofish_avx2_cfb_dec
+ELF(.type _gcry_twofish_avx2_cfb_dec,@function;)
+_gcry_twofish_avx2_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ /* Load input */
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RA0;
+ vmovdqu (0 * 32 + 16)(%rdx), RB0;
+ vmovdqu (1 * 32 + 16)(%rdx), RC0;
+ vmovdqu (2 * 32 + 16)(%rdx), RD0;
+ vmovdqu (3 * 32 + 16)(%rdx), RA1;
+ vmovdqu (4 * 32 + 16)(%rdx), RB1;
+ vmovdqu (5 * 32 + 16)(%rdx), RC1;
+ vmovdqu (6 * 32 + 16)(%rdx), RD1;
+
+ /* Update IV */
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx);
+
+ call __twofish_enc_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RB0, RB0;
+ vpxor (2 * 32)(%rdx), RC0, RC0;
+ vpxor (3 * 32)(%rdx), RD0, RD0;
+ vpxor (4 * 32)(%rdx), RA1, RA1;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RC1, RC1;
+ vpxor (7 * 32)(%rdx), RD1, RD1;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RB0, (1 * 32)(%rsi);
+ vmovdqu RC0, (2 * 32)(%rsi);
+ vmovdqu RD0, (3 * 32)(%rsi);
+ vmovdqu RA1, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RC1, (6 * 32)(%rsi);
+ vmovdqu RD1, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;)
+
+.align 8
+.globl _gcry_twofish_avx2_ocb_enc
+ELF(.type _gcry_twofish_avx2_ocb_enc,@function;)
+
+_gcry_twofish_avx2_ocb_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rcx), RTMP0x;
+ vmovdqu (%r8), RTMP1x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RTMP1, RTMP1; \
+ vpxor yreg, RNOT, yreg; \
+ vmovdqu RNOT, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RB0);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, RC0);
+ OCB_INPUT(3, %r12, %r13, RD0);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, RA1);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, RC1);
+ OCB_INPUT(7, %r12, %r13, RD1);
+#undef OCB_INPUT
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vmovdqu RTMP0x, (%rcx);
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%r8);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __twofish_enc_blk16;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vpxor (0 * 32)(%rsi), RA0, RA0;
+ vpxor (1 * 32)(%rsi), RB0, RB0;
+ vpxor (2 * 32)(%rsi), RC0, RC0;
+ vpxor (3 * 32)(%rsi), RD0, RD0;
+ vpxor (4 * 32)(%rsi), RA1, RA1;
+ vpxor (5 * 32)(%rsi), RB1, RB1;
+ vpxor (6 * 32)(%rsi), RC1, RC1;
+ vpxor (7 * 32)(%rsi), RD1, RD1;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RB0, (1 * 32)(%rsi);
+ vmovdqu RC0, (2 * 32)(%rsi);
+ vmovdqu RD0, (3 * 32)(%rsi);
+ vmovdqu RA1, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RC1, (6 * 32)(%rsi);
+ vmovdqu RD1, (7 * 32)(%rsi);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;)
+
+.align 8
+.globl _gcry_twofish_avx2_ocb_dec
+ELF(.type _gcry_twofish_avx2_ocb_dec,@function;)
+
+_gcry_twofish_avx2_ocb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: offset
+ * %r8 : checksum
+ * %r9 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rcx), RTMP0x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rdx), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RNOT, yreg; \
+ vmovdqu RNOT, (n * 32)(%rsi);
+
+ movq (0 * 8)(%r9), %r10;
+ movq (1 * 8)(%r9), %r11;
+ movq (2 * 8)(%r9), %r12;
+ movq (3 * 8)(%r9), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RB0);
+ movq (4 * 8)(%r9), %r10;
+ movq (5 * 8)(%r9), %r11;
+ movq (6 * 8)(%r9), %r12;
+ movq (7 * 8)(%r9), %r13;
+ OCB_INPUT(2, %r10, %r11, RC0);
+ OCB_INPUT(3, %r12, %r13, RD0);
+ movq (8 * 8)(%r9), %r10;
+ movq (9 * 8)(%r9), %r11;
+ movq (10 * 8)(%r9), %r12;
+ movq (11 * 8)(%r9), %r13;
+ OCB_INPUT(4, %r10, %r11, RA1);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r9), %r10;
+ movq (13 * 8)(%r9), %r11;
+ movq (14 * 8)(%r9), %r12;
+ movq (15 * 8)(%r9), %r13;
+ OCB_INPUT(6, %r10, %r11, RC1);
+ OCB_INPUT(7, %r12, %r13, RD1);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0x, (%rcx);
+ mov %r8, %rcx
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __twofish_dec_blk16;
+
+ vmovdqu (%rcx), RTMP1x;
+
+ vpxor (0 * 32)(%rsi), RA0, RA0;
+ vpxor (1 * 32)(%rsi), RB0, RB0;
+ vpxor (2 * 32)(%rsi), RC0, RC0;
+ vpxor (3 * 32)(%rsi), RD0, RD0;
+ vpxor (4 * 32)(%rsi), RA1, RA1;
+ vpxor (5 * 32)(%rsi), RB1, RB1;
+ vpxor (6 * 32)(%rsi), RC1, RC1;
+ vpxor (7 * 32)(%rsi), RD1, RD1;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vpxor RA0, RTMP1, RTMP1;
+ vmovdqu RB0, (1 * 32)(%rsi);
+ vpxor RB0, RTMP1, RTMP1;
+ vmovdqu RC0, (2 * 32)(%rsi);
+ vpxor RC0, RTMP1, RTMP1;
+ vmovdqu RD0, (3 * 32)(%rsi);
+ vpxor RD0, RTMP1, RTMP1;
+ vmovdqu RA1, (4 * 32)(%rsi);
+ vpxor RA1, RTMP1, RTMP1;
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vpxor RB1, RTMP1, RTMP1;
+ vmovdqu RC1, (6 * 32)(%rsi);
+ vpxor RC1, RTMP1, RTMP1;
+ vmovdqu RD1, (7 * 32)(%rsi);
+ vpxor RD1, RTMP1, RTMP1;
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%rcx);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;)
+
+.align 8
+.globl _gcry_twofish_avx2_ocb_auth
+ELF(.type _gcry_twofish_avx2_ocb_auth,@function;)
+
+_gcry_twofish_avx2_ocb_auth:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: abuf (16 blocks)
+ * %rdx: offset
+ * %rcx: checksum
+ * %r8 : L pointers (void *L[16])
+ */
+ CFI_STARTPROC();
+
+ vzeroupper;
+
+ subq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+ movq %r10, (0 * 8)(%rsp);
+ movq %r11, (1 * 8)(%rsp);
+ movq %r12, (2 * 8)(%rsp);
+ movq %r13, (3 * 8)(%rsp);
+ CFI_REL_OFFSET(%r10, 0 * 8);
+ CFI_REL_OFFSET(%r11, 1 * 8);
+ CFI_REL_OFFSET(%r12, 2 * 8);
+ CFI_REL_OFFSET(%r13, 3 * 8);
+
+ vmovdqu (%rdx), RTMP0x;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+ vmovdqu (n * 32)(%rsi), yreg; \
+ vpxor (l0reg), RTMP0x, RNOTx; \
+ vpxor (l1reg), RNOTx, RTMP0x; \
+ vinserti128 $1, RTMP0x, RNOT, RNOT; \
+ vpxor yreg, RNOT, yreg;
+
+ movq (0 * 8)(%r8), %r10;
+ movq (1 * 8)(%r8), %r11;
+ movq (2 * 8)(%r8), %r12;
+ movq (3 * 8)(%r8), %r13;
+ OCB_INPUT(0, %r10, %r11, RA0);
+ OCB_INPUT(1, %r12, %r13, RB0);
+ movq (4 * 8)(%r8), %r10;
+ movq (5 * 8)(%r8), %r11;
+ movq (6 * 8)(%r8), %r12;
+ movq (7 * 8)(%r8), %r13;
+ OCB_INPUT(2, %r10, %r11, RC0);
+ OCB_INPUT(3, %r12, %r13, RD0);
+ movq (8 * 8)(%r8), %r10;
+ movq (9 * 8)(%r8), %r11;
+ movq (10 * 8)(%r8), %r12;
+ movq (11 * 8)(%r8), %r13;
+ OCB_INPUT(4, %r10, %r11, RA1);
+ OCB_INPUT(5, %r12, %r13, RB1);
+ movq (12 * 8)(%r8), %r10;
+ movq (13 * 8)(%r8), %r11;
+ movq (14 * 8)(%r8), %r12;
+ movq (15 * 8)(%r8), %r13;
+ OCB_INPUT(6, %r10, %r11, RC1);
+ OCB_INPUT(7, %r12, %r13, RD1);
+#undef OCB_INPUT
+
+ vmovdqu RTMP0x, (%rdx);
+
+ movq (0 * 8)(%rsp), %r10;
+ movq (1 * 8)(%rsp), %r11;
+ movq (2 * 8)(%rsp), %r12;
+ movq (3 * 8)(%rsp), %r13;
+ CFI_RESTORE(%r10);
+ CFI_RESTORE(%r11);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+
+ call __twofish_enc_blk16;
+
+ vpxor RA0, RB0, RA0;
+ vpxor RC0, RD0, RC0;
+ vpxor RA1, RB1, RA1;
+ vpxor RC1, RD1, RC1;
+
+ vpxor RA0, RC0, RA0;
+ vpxor RA1, RC1, RA1;
+
+ addq $(4 * 8), %rsp;
+ CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+ vpxor RA1, RA0, RTMP1;
+
+ vextracti128 $1, RTMP1, RNOTx;
+ vpxor (%rcx), RTMP1x, RTMP1x;
+ vpxor RNOTx, RTMP1x, RTMP1x;
+ vmovdqu RTMP1x, (%rcx);
+
+ vzeroall;
+
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ocb_auth,.-_gcry_twofish_avx2_ocb_auth;)
+
+.align 16
+
+/* For CTR-mode IV byteswap */
+ _gcry_twofish_bswap128_mask:
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+ELF(.size _gcry_twofish_bswap128_mask,.-_gcry_twofish_bswap128_mask;)
+
+#endif /*defined(USE_TWOFISH) && defined(ENABLE_AVX2_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/comm/third_party/libgcrypt/cipher/twofish.c b/comm/third_party/libgcrypt/cipher/twofish.c
new file mode 100644
index 0000000000..d19e079046
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/twofish.c
@@ -0,0 +1,1793 @@
+/* Twofish for GPG
+ * Copyright (C) 1998, 2002, 2003 Free Software Foundation, Inc.
+ * Written by Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ ********************************************************************
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ *
+ * Only the 128- and 256-bit key sizes are supported. This code is intended
+ * for GNU C on a 32-bit system, but it should work almost anywhere. Loops
+ * are unrolled, precomputation tables are used, etc., for maximum speed at
+ * some cost in memory consumption. */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h" /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+
+#define TWOFISH_BLOCKSIZE 16
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+# define USE_ARM_ASM 1
+# endif
+#endif
+# if defined(__AARCH64EL__)
+# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+# define USE_ARM_ASM 1
+# endif
+# endif
+
+/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# if defined(ENABLE_AVX2_SUPPORT)
+# define USE_AVX2 1
+# endif
+#endif
+
+
+/* Prototype for the self-test function. */
+static const char *selftest(void);
+
+
+/* Prototypes for the bulk functions. */
+static void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_twofish_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static void _gcry_twofish_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
+
+
+/* Structure for an expanded Twofish key. s contains the key-dependent
+ * S-boxes composed with the MDS matrix; w contains the eight "whitening"
+ * subkeys, K[0] through K[7]. k holds the remaining, "round" subkeys. Note
+ * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
+typedef struct {
+ u32 s[4][256], w[8], k[32];
+
+#ifdef USE_AVX2
+ int use_avx2;
+#endif
+} TWOFISH_context;
+
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+# define ASM_FUNC_ABI
+# endif
+#endif
+
+
+/* These two tables are the q0 and q1 permutations, exactly as described in
+ * the Twofish paper. */
+
+static const byte q0[256] = {
+ 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
+ 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
+ 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
+ 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
+ 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
+ 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
+ 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
+ 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
+ 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
+ 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
+ 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
+ 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
+ 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
+ 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
+ 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
+ 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
+ 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
+ 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
+ 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
+ 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
+ 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
+ 0x4A, 0x5E, 0xC1, 0xE0
+};
+
+static const byte q1[256] = {
+ 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
+ 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
+ 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
+ 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
+ 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
+ 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
+ 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
+ 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
+ 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
+ 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
+ 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
+ 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
+ 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
+ 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
+ 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
+ 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
+ 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
+ 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
+ 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
+ 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
+ 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
+ 0x55, 0x09, 0xBE, 0x91
+};
+
+/* These MDS tables are actually tables of MDS composed with q0 and q1,
+ * because it is only ever used that way and we can save some time by
+ * precomputing. Of course the main saving comes from precomputing the
+ * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
+ * things up in these tables we reduce the matrix multiply to four lookups
+ * and three XORs. Semi-formally, the definition of these tables is:
+ * mds[0][i] = MDS (q1[i] 0 0 0)^T mds[1][i] = MDS (0 q0[i] 0 0)^T
+ * mds[2][i] = MDS (0 0 q1[i] 0)^T mds[3][i] = MDS (0 0 0 q0[i])^T
+ * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
+ * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
+ * by Schneier et al, and I'm casually glossing over the byte/word
+ * conversion issues. */
+
+static const u32 mds[4][256] = {
+ {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
+ 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
+ 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
+ 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
+ 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
+ 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
+ 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
+ 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
+ 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
+ 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
+ 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
+ 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
+ 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
+ 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
+ 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
+ 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
+ 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
+ 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
+ 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
+ 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
+ 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
+ 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
+ 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
+ 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
+ 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
+ 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
+ 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
+ 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
+ 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
+ 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
+ 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
+ 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
+ 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
+ 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
+ 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
+ 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
+ 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
+ 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
+ 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
+ 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
+ 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
+ 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
+ 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
+
+ {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
+ 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
+ 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
+ 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
+ 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
+ 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
+ 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
+ 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
+ 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
+ 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
+ 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
+ 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
+ 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
+ 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
+ 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
+ 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
+ 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
+ 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
+ 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
+ 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
+ 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
+ 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
+ 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
+ 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
+ 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
+ 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
+ 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
+ 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
+ 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
+ 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
+ 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
+ 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
+ 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
+ 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
+ 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
+ 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
+ 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
+ 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
+ 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
+ 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
+ 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
+ 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
+ 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
+
+ {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
+ 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
+ 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
+ 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
+ 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
+ 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
+ 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
+ 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
+ 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
+ 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
+ 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
+ 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
+ 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
+ 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
+ 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
+ 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
+ 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
+ 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
+ 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
+ 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
+ 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
+ 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
+ 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
+ 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
+ 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
+ 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
+ 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
+ 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
+ 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
+ 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
+ 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
+ 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
+ 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
+ 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
+ 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
+ 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
+ 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
+ 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
+ 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
+ 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
+ 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
+ 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
+ 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
+
+ {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
+ 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
+ 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
+ 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
+ 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
+ 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
+ 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
+ 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
+ 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
+ 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
+ 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
+ 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
+ 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
+ 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
+ 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
+ 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
+ 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
+ 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
+ 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
+ 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
+ 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
+ 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
+ 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
+ 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
+ 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
+ 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
+ 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
+ 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
+ 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
+ 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
+ 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
+ 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
+ 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
+ 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
+ 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
+ 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
+ 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
+ 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
+ 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
+ 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
+ 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
+ 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
+ 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
+};
+
+/* The exp_to_poly and poly_to_exp tables are used to perform efficient
+ * operations in GF(2^8) represented as GF(2)[x]/w(x) where
+ * w(x)=x^8+x^6+x^3+x^2+1. We care about doing that because it's part of the
+ * definition of the RS matrix in the key schedule. Elements of that field
+ * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
+ * which can be represented naturally by bytes (just substitute x=2). In that
+ * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
+ * multiplication is inefficient without hardware support. To multiply
+ * faster, I make use of the fact x is a generator for the nonzero elements,
+ * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
+ * some n in 0..254. Note that that caret is exponentiation in GF(2^8),
+ * *not* polynomial notation. So if I want to compute pq where p and q are
+ * in GF(2^8), I can just say:
+ * 1. if p=0 or q=0 then pq=0
+ * 2. otherwise, find m and n such that p=x^m and q=x^n
+ * 3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
+ * The translations in steps 2 and 3 are looked up in the tables
+ * poly_to_exp (for step 2) and exp_to_poly (for step 3). To see this
+ * in action, look at the CALC_S macro. As additional wrinkles, note that
+ * one of my operands is always a constant, so the poly_to_exp lookup on it
+ * is done in advance; I included the original values in the comments so
+ * readers can have some chance of recognizing that this *is* the RS matrix
+ * from the Twofish paper. I've only included the table entries I actually
+ * need; I never do a lookup on a variable input of zero and the biggest
+ * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
+ * never sum to more than 491. I'm repeating part of the exp_to_poly table
+ * so that I don't have to do mod-255 reduction in the exponent arithmetic.
+ * Since I know my constant operands are never zero, I only have to worry
+ * about zero values in the variable operand, and I do it with a simple
+ * conditional branch. I know conditionals are expensive, but I couldn't
+ * see a non-horrible way of avoiding them, and I did manage to group the
+ * statements so that each if covers four group multiplications. */
+
+static const u16 poly_to_exp[256] = {
+ 492,
+ 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
+ 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
+ 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
+ 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
+ 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
+ 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
+ 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
+ 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
+ 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
+ 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
+ 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
+ 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
+ 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
+ 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
+ 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
+ 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
+ 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
+ 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
+ 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
+ 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
+ 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
+ 0x85, 0xC8, 0xA1
+};
+
+static const byte exp_to_poly[492 + 256] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
+ 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
+ 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
+ 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
+ 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
+ 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
+ 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
+ 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
+ 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
+ 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
+ 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
+ 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
+ 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
+ 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
+ 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
+ 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
+ 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
+ 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
+ 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
+ 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
+ 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
+ 0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
+ 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
+ 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
+ 0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
+ 0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
+ 0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
+ 0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
+ 0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
+ 0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
+ 0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
+ 0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
+ 0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
+ 0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
+ 0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
+ 0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
+ 0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
+ 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
+ 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
+ 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
+ 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB,
+};
+
+
+/* The table constants are indices of
+ * S-box entries, preprocessed through q0 and q1. */
+static byte calc_sb_tbl[512] = {
+ 0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
+ 0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
+ 0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
+ 0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
+ 0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
+ 0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
+ 0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
+ 0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
+ 0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
+ 0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
+ 0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
+ 0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
+ 0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
+ 0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
+ 0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
+ 0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
+ 0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
+ 0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
+ 0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
+ 0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
+ 0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
+ 0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
+ 0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
+ 0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
+ 0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
+ 0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
+ 0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
+ 0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
+ 0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
+ 0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
+ 0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
+ 0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
+ 0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
+ 0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
+ 0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
+ 0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
+ 0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
+ 0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
+ 0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
+ 0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
+ 0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
+ 0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
+ 0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
+ 0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
+ 0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
+ 0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
+ 0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
+ 0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
+ 0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
+ 0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
+ 0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
+ 0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
+ 0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
+ 0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
+ 0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
+ 0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
+ 0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
+ 0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
+ 0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
+ 0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
+ 0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
+ 0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
+ 0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
+ 0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
+};
+
+/* Macro to perform one column of the RS matrix multiplication. The
+ * parameters a, b, c, and d are the four bytes of output; i is the index
+ * of the key bytes, and w, x, y, and z, are the column of constants from
+ * the RS matrix, preprocessed through the poly_to_exp table. */
+
+#define CALC_S(a, b, c, d, i, w, x, y, z) \
+ { \
+ tmp = poly_to_exp[key[i]]; \
+ (a) ^= exp_to_poly[tmp + (w)]; \
+ (b) ^= exp_to_poly[tmp + (x)]; \
+ (c) ^= exp_to_poly[tmp + (y)]; \
+ (d) ^= exp_to_poly[tmp + (z)]; \
+ }
+
+/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
+ * the S vector from CALC_S. CALC_SB_2 computes a single entry in all
+ * four S-boxes, where i is the index of the entry to compute, and a and b
+ * are the index numbers preprocessed through the q0 and q1 tables
+ * respectively. CALC_SB is simply a convenience to make the code shorter;
+ * it calls CALC_SB_2 four times with consecutive indices from i to i+3,
+ * using the remaining parameters two by two. */
+
+#define CALC_SB_2(i, a, b) \
+ ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
+ ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
+ ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
+ ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
+
+#define CALC_SB(i, a, b, c, d, e, f, g, h) \
+ CALC_SB_2 (i, a, b); CALC_SB_2 ((i)+1, c, d); \
+ CALC_SB_2 ((i)+2, e, f); CALC_SB_2 ((i)+3, g, h)
+
+/* Macros exactly like CALC_SB and CALC_SB_2, but for 256-bit keys. */
+
+#define CALC_SB256_2(i, a, b) \
+ ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
+ ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
+ ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
+ ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
+
+#define CALC_SB256(i, a, b, c, d, e, f, g, h) \
+ CALC_SB256_2 (i, a, b); CALC_SB256_2 ((i)+1, c, d); \
+ CALC_SB256_2 ((i)+2, e, f); CALC_SB256_2 ((i)+3, g, h)
+
+/* Macros to calculate the whitening and round subkeys. CALC_K_2 computes the
+ * last two stages of the h() function for a given index (either 2i or 2i+1).
+ * a, b, c, and d are the four bytes going into the last two stages. For
+ * 128-bit keys, this is the entire h() function and a and c are the index
+ * preprocessed through q0 and q1 respectively; for longer keys they are the
+ * output of previous stages. j is the index of the first key byte to use.
+ * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
+ * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
+ * rotations. Its parameters are: a, the array to write the results into,
+ * j, the index of the first output entry, k and l, the preprocessed indices
+ * for index 2i, and m and n, the preprocessed indices for index 2i+1.
+ * CALC_K256_2 expands CALC_K_2 to handle 256-bit keys, by doing two
+ * additional lookup-and-XOR stages. The parameters a and b are the index
+ * preprocessed through q0 and q1 respectively; j is the index of the first
+ * key byte to use. CALC_K256 is identical to CALC_K but for using the
+ * CALC_K256_2 macro instead of CALC_K_2. */
+
+#define CALC_K_2(a, b, c, d, j) \
+ mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
+ ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
+ ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
+ ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
+
+#define CALC_K(a, j, k, l, m, n) \
+ x = CALC_K_2 (k, l, k, l, 0); \
+ y = CALC_K_2 (m, n, m, n, 4); \
+ y = (y << 8) + (y >> 24); \
+ x += y; y += x; ctx->a[j] = x; \
+ ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+
+#define CALC_K256_2(a, b, j) \
+ CALC_K_2 (q0[q1[b ^ key[(j) + 24]] ^ key[(j) + 16]], \
+ q1[q1[a ^ key[(j) + 25]] ^ key[(j) + 17]], \
+ q0[q0[a ^ key[(j) + 26]] ^ key[(j) + 18]], \
+ q1[q0[b ^ key[(j) + 27]] ^ key[(j) + 19]], j)
+
+#define CALC_K256(a, j, k, l, m, n) \
+ x = CALC_K256_2 (k, l, 0); \
+ y = CALC_K256_2 (m, n, 4); \
+ y = (y << 8) + (y >> 24); \
+ x += y; y += x; ctx->a[j] = x; \
+ ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+
+
+
+/* Perform the key setup. Note that this works only with 128- and 256-bit
+ * keys, despite the API that looks like it might support other sizes. */
+
+static gcry_err_code_t
+do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen)
+{
+ int i, j, k;
+
+ /* Temporaries for CALC_K. */
+ u32 x, y;
+
+ /* The S vector used to key the S-boxes, split up into individual bytes.
+ * 128-bit keys use only sa through sh; 256-bit use all of them. */
+ byte sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
+ byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
+
+ /* Temporary for CALC_S. */
+ unsigned int tmp;
+
+ /* Flags for self-test. */
+ static int initialized = 0;
+ static const char *selftest_failed=0;
+
+ /* Check key length. */
+ if( ( ( keylen - 16 ) | 16 ) != 16 )
+ return GPG_ERR_INV_KEYLEN;
+
+ /* Do self-test if necessary. */
+ if (!initialized)
+ {
+ initialized = 1;
+ selftest_failed = selftest ();
+ if( selftest_failed )
+ log_error("%s\n", selftest_failed );
+ }
+ if( selftest_failed )
+ return GPG_ERR_SELFTEST_FAILED;
+
+ /* Compute the first two words of the S vector. The magic numbers are
+ * the entries of the RS matrix, preprocessed through poly_to_exp. The
+ * numbers in the comments are the original (polynomial form) matrix
+ * entries. */
+ CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+ CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+ CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+ CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+ CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+ CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+ CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+ CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+ CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+ CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+ CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+ CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+ CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+ CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+ CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+ CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+ if (keylen == 32) /* 256-bit key */
+ {
+ /* Calculate the remaining two words of the S vector */
+ CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+ CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+ CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+ CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+ CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+ CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+ CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+ CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+ CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+ CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+ CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+ CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+ CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+ CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+ CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+ CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+ /* Compute the S-boxes. */
+ for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
+ {
+ CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+ }
+
+ /* Calculate whitening and round subkeys. */
+ for (i = 0; i < 8; i += 2)
+ {
+ CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+ }
+ for (j = 0; j < 32; j += 2, i += 2)
+ {
+ CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+ }
+ }
+ else
+ {
+ /* Compute the S-boxes. */
+ for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
+ {
+ CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+ }
+
+ /* Calculate whitening and round subkeys. */
+ for (i = 0; i < 8; i += 2)
+ {
+ CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+ }
+ for (j = 0; j < 32; j += 2, i += 2)
+ {
+ CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+ }
+ }
+
+ return 0;
+}
+
+static gcry_err_code_t
+twofish_setkey (void *context, const byte *key, unsigned int keylen,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ TWOFISH_context *ctx = context;
+ unsigned int hwfeatures = _gcry_get_hw_features ();
+ int rc;
+
+ rc = do_twofish_setkey (ctx, key, keylen);
+
+#ifdef USE_AVX2
+ ctx->use_avx2 = 0;
+ if ((hwfeatures & HWF_INTEL_AVX2) && (hwfeatures & HWF_INTEL_FAST_VPGATHER))
+ {
+ ctx->use_avx2 = 1;
+ }
+#endif
+
+ /* Setup bulk encryption routines. */
+ memset (bulk_ops, 0, sizeof(*bulk_ops));
+ bulk_ops->cbc_dec = _gcry_twofish_cbc_dec;
+ bulk_ops->cfb_dec = _gcry_twofish_cfb_dec;
+ bulk_ops->ctr_enc = _gcry_twofish_ctr_enc;
+ bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt;
+ bulk_ops->ocb_auth = _gcry_twofish_ocb_auth;
+
+ (void)hwfeatures;
+
+ _gcry_burn_stack (23+6*sizeof(void*));
+ return rc;
+}
+
+
+#ifdef USE_AVX2
+/* Assembler implementations of Twofish using AVX2. Process 16 block in
+ parallel.
+ */
+extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx,
+ const unsigned char *abuf,
+ unsigned char *offset,
+ unsigned char *checksum,
+ const u64 Ls[16]) ASM_FUNC_ABI;
+#endif
+
+
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementations of Twofish. */
+extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c,
+ byte *out, const byte *in);
+
+extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c,
+ byte *out, const byte *in);
+
+/* These assembly implementations process three blocks in parallel. */
+extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out,
+ const byte *in, byte *offset,
+ byte *checksum, const u64 Ls[3]);
+
+extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out,
+ const byte *in, byte *offset,
+ byte *checksum, const u64 Ls[3]);
+
+extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx,
+ const byte *abuf, byte *offset,
+ byte *checksum, const u64 Ls[3]);
+
+static inline void
+twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
+{
+ _gcry_twofish_amd64_encrypt_block(c, out, in);
+}
+
+static inline void
+twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
+{
+ _gcry_twofish_amd64_decrypt_block(c, out, in);
+}
+
+static inline void
+twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
+ byte *ctr)
+{
+ _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
+}
+
+static inline void
+twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
+ byte *iv)
+{
+ _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
+}
+
+static inline void
+twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
+ byte *iv)
+{
+ _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
+}
+
+static inline void
+twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in,
+ byte *offset, byte *checksum, const u64 Ls[3])
+{
+ _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls);
+}
+
+static inline void
+twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in,
+ byte *offset, byte *checksum, const u64 Ls[3])
+{
+ _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls);
+}
+
+static inline void
+twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf,
+ byte *offset, byte *checksum, const u64 Ls[3])
+{
+ _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls);
+}
+
+#elif defined(USE_ARM_ASM)
+
+/* Assembly implementations of Twofish. */
+extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c,
+ byte *out, const byte *in);
+
+extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c,
+ byte *out, const byte *in);
+
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
+/* Macros to compute the g() function in the encryption and decryption
+ * rounds. G1 is the straight g() function; G2 includes the 8-bit
+ * rotation for the high 32-bit word. */
+
+#define G1(a) \
+ (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \
+ ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24])
+
+#define G2(b) \
+ (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \
+ ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24])
+
+/* Encryption and decryption Feistel rounds. Each one calls the two g()
+ * macros, does the PHT, and performs the XOR and the appropriate bit
+ * rotations. The parameters are the round number (used to select subkeys),
+ * and the four 32-bit chunks of the text. */
+
+#define ENCROUND(n, a, b, c, d) \
+ x = G1 (a); y = G2 (b); \
+ x += y; y += x + ctx->k[2 * (n) + 1]; \
+ (c) ^= x + ctx->k[2 * (n)]; \
+ (c) = ((c) >> 1) + ((c) << 31); \
+ (d) = (((d) << 1)+((d) >> 31)) ^ y
+
+#define DECROUND(n, a, b, c, d) \
+ x = G1 (a); y = G2 (b); \
+ x += y; y += x; \
+ (d) ^= y + ctx->k[2 * (n) + 1]; \
+ (d) = ((d) >> 1) + ((d) << 31); \
+ (c) = (((c) << 1)+((c) >> 31)); \
+ (c) ^= (x + ctx->k[2 * (n)])
+
+/* Encryption and decryption cycles; each one is simply two Feistel rounds
+ * with the 32-bit chunks re-ordered to simulate the "swap" */
+
+#define ENCCYCLE(n) \
+ ENCROUND (2 * (n), a, b, c, d); \
+ ENCROUND (2 * (n) + 1, c, d, a, b)
+
+#define DECCYCLE(n) \
+ DECROUND (2 * (n) + 1, c, d, a, b); \
+ DECROUND (2 * (n), a, b, c, d)
+
+/* Macros to convert the input and output bytes into 32-bit words,
+ * and simultaneously perform the whitening step. INPACK packs word
+ * number n into the variable named by x, using whitening subkey number m.
+ * OUTUNPACK unpacks word number n from the variable named by x, using
+ * whitening subkey number m. */
+
+#define INPACK(n, x, m) \
+ x = buf_get_le32(in + (n) * 4); \
+ x ^= ctx->w[m]
+
+#define OUTUNPACK(n, x, m) \
+ x ^= ctx->w[m]; \
+ buf_put_le32(out + (n) * 4, x)
+
+#endif /*!USE_AMD64_ASM*/
+
+
+/* Encrypt one block. in and out may be the same. */
+
+#ifdef USE_AMD64_ASM
+
+static unsigned int
+twofish_encrypt (void *context, byte *out, const byte *in)
+{
+ TWOFISH_context *ctx = context;
+ twofish_amd64_encrypt_block(ctx, out, in);
+ return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#elif defined(USE_ARM_ASM)
+
+static unsigned int
+twofish_encrypt (void *context, byte *out, const byte *in)
+{
+ TWOFISH_context *ctx = context;
+ _gcry_twofish_arm_encrypt_block(ctx, out, in);
+ return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
+static void
+do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
+{
+ /* The four 32-bit chunks of the text. */
+ u32 a, b, c, d;
+
+ /* Temporaries used by the round function. */
+ u32 x, y;
+
+ /* Input whitening and packing. */
+ INPACK (0, a, 0);
+ INPACK (1, b, 1);
+ INPACK (2, c, 2);
+ INPACK (3, d, 3);
+
+ /* Encryption Feistel cycles. */
+ ENCCYCLE (0);
+ ENCCYCLE (1);
+ ENCCYCLE (2);
+ ENCCYCLE (3);
+ ENCCYCLE (4);
+ ENCCYCLE (5);
+ ENCCYCLE (6);
+ ENCCYCLE (7);
+
+ /* Output whitening and unpacking. */
+ OUTUNPACK (0, c, 4);
+ OUTUNPACK (1, d, 5);
+ OUTUNPACK (2, a, 6);
+ OUTUNPACK (3, b, 7);
+}
+
+static unsigned int
+twofish_encrypt (void *context, byte *out, const byte *in)
+{
+ TWOFISH_context *ctx = context;
+ do_twofish_encrypt (ctx, out, in);
+ return /*burn_stack*/ (24+3*sizeof (void*));
+}
+
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
+
+/* Decrypt one block. in and out may be the same. */
+
+#ifdef USE_AMD64_ASM
+
+static unsigned int
+twofish_decrypt (void *context, byte *out, const byte *in)
+{
+ TWOFISH_context *ctx = context;
+ twofish_amd64_decrypt_block(ctx, out, in);
+ return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#elif defined(USE_ARM_ASM)
+
+static unsigned int
+twofish_decrypt (void *context, byte *out, const byte *in)
+{
+ TWOFISH_context *ctx = context;
+ _gcry_twofish_arm_decrypt_block(ctx, out, in);
+ return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
+static void
+do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
+{
+ /* The four 32-bit chunks of the text. */
+ u32 a, b, c, d;
+
+ /* Temporaries used by the round function. */
+ u32 x, y;
+
+ /* Input whitening and packing. */
+ INPACK (0, c, 4);
+ INPACK (1, d, 5);
+ INPACK (2, a, 6);
+ INPACK (3, b, 7);
+
+ /* Encryption Feistel cycles. */
+ DECCYCLE (7);
+ DECCYCLE (6);
+ DECCYCLE (5);
+ DECCYCLE (4);
+ DECCYCLE (3);
+ DECCYCLE (2);
+ DECCYCLE (1);
+ DECCYCLE (0);
+
+ /* Output whitening and unpacking. */
+ OUTUNPACK (0, a, 0);
+ OUTUNPACK (1, b, 1);
+ OUTUNPACK (2, c, 2);
+ OUTUNPACK (3, d, 3);
+}
+
+static unsigned int
+twofish_decrypt (void *context, byte *out, const byte *in)
+{
+ TWOFISH_context *ctx = context;
+
+ do_twofish_decrypt (ctx, out, in);
+ return /*burn_stack*/ (24+3*sizeof (void*));
+}
+
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
+
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size TWOFISH_BLOCKSIZE. */
+static void
+_gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ TWOFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[TWOFISH_BLOCKSIZE];
+ unsigned int burn, burn_stack_depth = 0;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 16;
+ outbuf += 16 * TWOFISH_BLOCKSIZE;
+ inbuf += 16 * TWOFISH_BLOCKSIZE;
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* twofish-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+ }
+#endif
+
+#ifdef USE_AMD64_ASM
+ {
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 3;
+ outbuf += 3 * TWOFISH_BLOCKSIZE;
+ inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 8 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ burn = twofish_encrypt(ctx, tmpbuf, ctr);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+
+ /* XOR the input with the encrypted counter and store in output. */
+ cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
+ outbuf += TWOFISH_BLOCKSIZE;
+ inbuf += TWOFISH_BLOCKSIZE;
+ /* Increment the counter. */
+ cipher_block_add(ctr, 1, TWOFISH_BLOCKSIZE);
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ TWOFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[TWOFISH_BLOCKSIZE];
+ unsigned int burn, burn_stack_depth = 0;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * TWOFISH_BLOCKSIZE;
+ inbuf += 16 * TWOFISH_BLOCKSIZE;
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* twofish-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+ }
+#endif
+
+#ifdef USE_AMD64_ASM
+ {
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 3;
+ outbuf += 3 * TWOFISH_BLOCKSIZE;
+ inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 9 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ burn = twofish_decrypt (ctx, savebuf, inbuf);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+
+ cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+ inbuf += TWOFISH_BLOCKSIZE;
+ outbuf += TWOFISH_BLOCKSIZE;
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ TWOFISH_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn, burn_stack_depth = 0;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * TWOFISH_BLOCKSIZE;
+ inbuf += 16 * TWOFISH_BLOCKSIZE;
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* twofish-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+ }
+#endif
+
+#ifdef USE_AMD64_ASM
+ {
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 3;
+ outbuf += 3 * TWOFISH_BLOCKSIZE;
+ inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 8 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ burn = twofish_encrypt(ctx, iv, iv);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+
+ cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+ outbuf += TWOFISH_BLOCKSIZE;
+ inbuf += TWOFISH_BLOCKSIZE;
+ }
+
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+#ifdef USE_AMD64_ASM
+ TWOFISH_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn, burn_stack_depth = 0;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ if (encrypt)
+ _gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 16;
+ outbuf += 16 * TWOFISH_BLOCKSIZE;
+ inbuf += 16 * TWOFISH_BLOCKSIZE;
+ did_use_avx2 = 1;
+ }
+ }
+
+ if (did_use_avx2)
+ {
+ /* twofish-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+ }
+#endif
+
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ u64 Ls[3];
+
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+ Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+ Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
+ blkn += 3;
+
+ if (encrypt)
+ twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+ Ls);
+ else
+ twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+ Ls);
+
+ nblocks -= 3;
+ outbuf += 3 * TWOFISH_BLOCKSIZE;
+ inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 8 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+
+ c->u_mode.ocb.data_nblocks = blkn;
+
+ if (burn_stack_depth)
+ _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#else
+ (void)c;
+ (void)outbuf_arg;
+ (void)inbuf_arg;
+ (void)encrypt;
+#endif
+
+ return nblocks;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks)
+{
+#ifdef USE_AMD64_ASM
+ TWOFISH_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ unsigned int burn, burn_stack_depth = 0;
+ u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+ u64 Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ u64 *l;
+ int i;
+
+ if (nblocks >= 16)
+ {
+ for (i = 0; i < 16; i += 8)
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ blkn += 16;
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+ _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 16;
+ abuf += 16 * TWOFISH_BLOCKSIZE;
+ did_use_avx2 = 1;
+ }
+ }
+
+ if (did_use_avx2)
+ {
+ /* twofish-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ {
+ /* Use u64 to store pointers for x32 support (assembly function
+ * assumes 64-bit pointers). */
+ u64 Ls[3];
+
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+ Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+ Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
+ blkn += 3;
+
+ twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
+
+ nblocks -= 3;
+ abuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 8 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+
+ c->u_mode.ocb.aad_nblocks = blkn;
+
+ if (burn_stack_depth)
+ _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#else
+ (void)c;
+ (void)abuf_arg;
+#endif
+
+ return nblocks;
+}
+
+
+
+/* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+ const int nblocks = 16+1;
+ const int blocksize = TWOFISH_BLOCKSIZE;
+ const int context_size = sizeof(TWOFISH_context);
+
+ return _gcry_selftest_helper_ctr("TWOFISH", &twofish_setkey,
+ &twofish_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for TWOFISH-CBC, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+ const int nblocks = 16+2;
+ const int blocksize = TWOFISH_BLOCKSIZE;
+ const int context_size = sizeof(TWOFISH_context);
+
+ return _gcry_selftest_helper_cbc("TWOFISH", &twofish_setkey,
+ &twofish_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for TWOFISH-CFB, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+ const int nblocks = 16+2;
+ const int blocksize = TWOFISH_BLOCKSIZE;
+ const int context_size = sizeof(TWOFISH_context);
+
+ return _gcry_selftest_helper_cfb("TWOFISH", &twofish_setkey,
+ &twofish_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Test a single encryption and decryption with each key size. */
+
+static const char*
+selftest (void)
+{
+ TWOFISH_context ctx; /* Expanded key. */
+ byte scratch[16]; /* Encryption/decryption result buffer. */
+ cipher_bulk_ops_t bulk_ops;
+ const char *r;
+
+ /* Test vectors for single encryption/decryption. Note that I am using
+ * the vectors from the Twofish paper's "known answer test", I=3 for
+ * 128-bit and I=4 for 256-bit, instead of the all-0 vectors from the
+ * "intermediate value test", because an all-0 key would trigger all the
+ * special cases in the RS matrix multiply, leaving the math untested. */
+ static byte plaintext[16] = {
+ 0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E,
+ 0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19
+ };
+ static byte key[16] = {
+ 0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32,
+ 0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A
+ };
+ static const byte ciphertext[16] = {
+ 0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85,
+ 0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3
+ };
+ static byte plaintext_256[16] = {
+ 0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F,
+ 0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6
+ };
+ static byte key_256[32] = {
+ 0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46,
+ 0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D,
+ 0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B,
+ 0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F
+ };
+ static const byte ciphertext_256[16] = {
+ 0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97,
+ 0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
+ };
+
+ twofish_setkey (&ctx, key, sizeof(key), &bulk_ops);
+ twofish_encrypt (&ctx, scratch, plaintext);
+ if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
+ return "Twofish-128 test encryption failed.";
+ twofish_decrypt (&ctx, scratch, scratch);
+ if (memcmp (scratch, plaintext, sizeof (plaintext)))
+ return "Twofish-128 test decryption failed.";
+
+ twofish_setkey (&ctx, key_256, sizeof(key_256), &bulk_ops);
+ twofish_encrypt (&ctx, scratch, plaintext_256);
+ if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
+ return "Twofish-256 test encryption failed.";
+ twofish_decrypt (&ctx, scratch, scratch);
+ if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
+ return "Twofish-256 test decryption failed.";
+
+ if ((r = selftest_ctr()) != NULL)
+ return r;
+ if ((r = selftest_cbc()) != NULL)
+ return r;
+ if ((r = selftest_cfb()) != NULL)
+ return r;
+
+ return NULL;
+}
+
+/* More complete test program. This does 1000 encryptions and decryptions
+ * with each of 250 128-bit keys and 2000 encryptions and decryptions with
+ * each of 125 256-bit keys, using a feedback scheme similar to a Feistel
+ * cipher, so as to be sure of testing all the table entries pretty
+ * thoroughly. We keep changing the keys so as to get a more meaningful
+ * performance number, since the key setup is non-trivial for Twofish. */
+
+#ifdef TEST
+
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+int
+main()
+{
+ TWOFISH_context ctx; /* Expanded key. */
+ int i, j; /* Loop counters. */
+ cipher_bulk_ops_t bulk_ops;
+
+ const char *encrypt_msg; /* Message to print regarding encryption test;
+ * the printf is done outside the loop to avoid
+ * stuffing up the timing. */
+ clock_t timer; /* For computing elapsed time. */
+
+ /* Test buffer. */
+ byte buffer[4][16] = {
+ {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
+ {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
+ 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
+ {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
+ 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
+ {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
+ 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
+ };
+
+ /* Expected outputs for the million-operation test */
+ static const byte test_encrypt[4][16] = {
+ {0xC8, 0x23, 0xB8, 0xB7, 0x6B, 0xFE, 0x91, 0x13,
+ 0x2F, 0xA7, 0x5E, 0xE6, 0x94, 0x77, 0x6F, 0x6B},
+ {0x90, 0x36, 0xD8, 0x29, 0xD5, 0x96, 0xC2, 0x8E,
+ 0xE4, 0xFF, 0x76, 0xBC, 0xE5, 0x77, 0x88, 0x27},
+ {0xB8, 0x78, 0x69, 0xAF, 0x42, 0x8B, 0x48, 0x64,
+ 0xF7, 0xE9, 0xF3, 0x9C, 0x42, 0x18, 0x7B, 0x73},
+ {0x7A, 0x88, 0xFB, 0xEB, 0x90, 0xA4, 0xB4, 0xA8,
+ 0x43, 0xA3, 0x1D, 0xF1, 0x26, 0xC4, 0x53, 0x57}
+ };
+ static const byte test_decrypt[4][16] = {
+ {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
+ {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
+ 0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
+ {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
+ 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
+ {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
+ 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
+ };
+
+ /* Start the timer ticking. */
+ timer = clock ();
+
+ /* Encryption test. */
+ for (i = 0; i < 125; i++)
+ {
+ twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
+ for (j = 0; j < 1000; j++)
+ twofish_encrypt (&ctx, buffer[2], buffer[2]);
+ twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
+ for (j = 0; j < 1000; j++)
+ twofish_encrypt (&ctx, buffer[3], buffer[3]);
+ twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
+ for (j = 0; j < 1000; j++) {
+ twofish_encrypt (&ctx, buffer[0], buffer[0]);
+ twofish_encrypt (&ctx, buffer[1], buffer[1]);
+ }
+ }
+ encrypt_msg = memcmp (buffer, test_encrypt, sizeof (test_encrypt)) ?
+ "encryption failure!\n" : "encryption OK!\n";
+
+ /* Decryption test. */
+ for (i = 0; i < 125; i++)
+ {
+ twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
+ for (j = 0; j < 1000; j++) {
+ twofish_decrypt (&ctx, buffer[0], buffer[0]);
+ twofish_decrypt (&ctx, buffer[1], buffer[1]);
+ }
+ twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
+ for (j = 0; j < 1000; j++)
+ twofish_decrypt (&ctx, buffer[3], buffer[3]);
+ twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
+ for (j = 0; j < 1000; j++)
+ twofish_decrypt (&ctx, buffer[2], buffer[2]);
+ }
+
+ /* Stop the timer, and print results. */
+ timer = clock () - timer;
+ printf (encrypt_msg);
+ printf (memcmp (buffer, test_decrypt, sizeof (test_decrypt)) ?
+ "decryption failure!\n" : "decryption OK!\n");
+ printf ("elapsed time: %.1f s.\n", (float) timer / CLOCKS_PER_SEC);
+
+ return 0;
+}
+
+#endif /* TEST */
+
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_twofish =
+ {
+ GCRY_CIPHER_TWOFISH, {0, 0},
+ "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context),
+ twofish_setkey, twofish_encrypt, twofish_decrypt
+ };
+
+gcry_cipher_spec_t _gcry_cipher_spec_twofish128 =
+ {
+ GCRY_CIPHER_TWOFISH128, {0, 0},
+ "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context),
+ twofish_setkey, twofish_encrypt, twofish_decrypt
+ };
diff --git a/comm/third_party/libgcrypt/cipher/whirlpool-sse2-amd64.S b/comm/third_party/libgcrypt/cipher/whirlpool-sse2-amd64.S
new file mode 100644
index 0000000000..5631dc567a
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/whirlpool-sse2-amd64.S
@@ -0,0 +1,348 @@
+/* whirlpool-sse2-amd64.S - AMD64 assembly implementation of Whirlpool
+ *
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_WHIRLPOOL)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* look-up table offsets on RTAB */
+#define RC (0)
+#define C0 (RC + (8 * 10))
+#define C1 (C0 + (8 * 256))
+#define C2 (C1 + (8 * 256))
+#define C3 (C2 + (8 * 256))
+#define C4 (C3 + (8 * 256))
+#define C5 (C4 + (8 * 256))
+#define C6 (C5 + (8 * 256))
+#define C7 (C6 + (8 * 256))
+
+/* stack variables */
+#define STACK_DATAP (0)
+#define STACK_STATEP (STACK_DATAP + 8)
+#define STACK_ROUNDS (STACK_STATEP + 8)
+#define STACK_NBLKS (STACK_ROUNDS + 8)
+#define STACK_RBP (STACK_NBLKS + 8)
+#define STACK_RBX (STACK_RBP + 8)
+#define STACK_R12 (STACK_RBX + 8)
+#define STACK_R13 (STACK_R12 + 8)
+#define STACK_R14 (STACK_R13 + 8)
+#define STACK_R15 (STACK_R14 + 8)
+#define STACK_MAX (STACK_R15 + 8)
+
+/* register macros */
+#define RTAB %rbp
+
+#define RI1 %rax
+#define RI2 %rbx
+#define RI3 %rcx
+#define RI4 %rdx
+
+#define RI1d %eax
+#define RI2d %ebx
+#define RI3d %ecx
+#define RI4d %edx
+
+#define RI1bl %al
+#define RI2bl %bl
+#define RI3bl %cl
+#define RI4bl %dl
+
+#define RI1bh %ah
+#define RI2bh %bh
+#define RI3bh %ch
+#define RI4bh %dh
+
+#define RB0 %r8
+#define RB1 %r9
+#define RB2 %r10
+#define RB3 %r11
+#define RB4 %r12
+#define RB5 %r13
+#define RB6 %r14
+#define RB7 %r15
+
+#define RT0 %rsi
+#define RT1 %rdi
+
+#define RT0d %esi
+#define RT1d %edi
+
+#define XKEY0 %xmm0
+#define XKEY1 %xmm1
+#define XKEY2 %xmm2
+#define XKEY3 %xmm3
+#define XKEY4 %xmm4
+#define XKEY5 %xmm5
+#define XKEY6 %xmm6
+#define XKEY7 %xmm7
+
+#define XSTATE0 %xmm8
+#define XSTATE1 %xmm9
+#define XSTATE2 %xmm10
+#define XSTATE3 %xmm11
+#define XSTATE4 %xmm12
+#define XSTATE5 %xmm13
+#define XSTATE6 %xmm14
+#define XSTATE7 %xmm15
+
+/***********************************************************************
+ * AMD64 assembly implementation of Whirlpool.
+ * - Using table-lookups
+ * - Store state in XMM registers
+ ***********************************************************************/
+#define __do_whirl(op, ri, \
+ b0, b1, b2, b3, b4, b5, b6, b7, \
+ load_ri, load_arg) \
+ movzbl ri ## bl, RT0d; \
+ movzbl ri ## bh, RT1d; \
+ shrq $16, ri; \
+ op ## q C7(RTAB,RT0,8), b7; \
+ op ## q C6(RTAB,RT1,8), b6; \
+ movzbl ri ## bl, RT0d; \
+ movzbl ri ## bh, RT1d; \
+ shrq $16, ri; \
+ op ## q C5(RTAB,RT0,8), b5; \
+ op ## q C4(RTAB,RT1,8), b4; \
+ movzbl ri ## bl, RT0d; \
+ movzbl ri ## bh, RT1d; \
+ shrl $16, ri ## d; \
+ op ## q C3(RTAB,RT0,8), b3; \
+ op ## q C2(RTAB,RT1,8), b2; \
+ movzbl ri ## bl, RT0d; \
+ movzbl ri ## bh, RT1d; \
+ load_ri( load_arg, ri); \
+ op ## q C1(RTAB,RT0,8), b1; \
+ op ## q C0(RTAB,RT1,8), b0;
+
+#define do_whirl(op, ri, rb_add, load_ri, load_arg) \
+ __do_whirl(op, ##ri, rb_add, load_ri, load_arg)
+
+#define dummy(...) /*_*/
+
+#define do_movq(src, dst) movq src, dst;
+
+#define RB_ADD0 RB0, RB1, RB2, RB3, RB4, RB5, RB6, RB7
+#define RB_ADD1 RB1, RB2, RB3, RB4, RB5, RB6, RB7, RB0
+#define RB_ADD2 RB2, RB3, RB4, RB5, RB6, RB7, RB0, RB1
+#define RB_ADD3 RB3, RB4, RB5, RB6, RB7, RB0, RB1, RB2
+#define RB_ADD4 RB4, RB5, RB6, RB7, RB0, RB1, RB2, RB3
+#define RB_ADD5 RB5, RB6, RB7, RB0, RB1, RB2, RB3, RB4
+#define RB_ADD6 RB6, RB7, RB0, RB1, RB2, RB3, RB4, RB5
+#define RB_ADD7 RB7, RB0, RB1, RB2, RB3, RB4, RB5, RB6
+
+.align 8
+.globl _gcry_whirlpool_transform_amd64
+ELF(.type _gcry_whirlpool_transform_amd64,@function;)
+
+_gcry_whirlpool_transform_amd64:
+ /* input:
+ * %rdi: state
+ * %rsi: inblk
+ * %rdx: nblks
+ * %rcx: look-up tables
+ */
+ CFI_STARTPROC();
+ cmp $0, %rdx;
+ je .Lskip;
+
+ subq $STACK_MAX, %rsp;
+ CFI_ADJUST_CFA_OFFSET(STACK_MAX);
+ movq %rbp, STACK_RBP(%rsp);
+ movq %rbx, STACK_RBX(%rsp);
+ movq %r12, STACK_R12(%rsp);
+ movq %r13, STACK_R13(%rsp);
+ movq %r14, STACK_R14(%rsp);
+ movq %r15, STACK_R15(%rsp);
+ CFI_REL_OFFSET(%rbp, STACK_RBP);
+ CFI_REL_OFFSET(%rbx, STACK_RBX);
+ CFI_REL_OFFSET(%r12, STACK_R12);
+ CFI_REL_OFFSET(%r13, STACK_R13);
+ CFI_REL_OFFSET(%r14, STACK_R14);
+ CFI_REL_OFFSET(%r15, STACK_R15);
+
+ movq %rdx, STACK_NBLKS(%rsp);
+ movq %rdi, STACK_STATEP(%rsp);
+ movq %rsi, STACK_DATAP(%rsp);
+
+ movq %rcx, RTAB;
+
+ jmp .Lfirst_block;
+
+.align 8
+.Lblock_loop:
+ movq STACK_DATAP(%rsp), %rsi;
+ movq RI1, %rdi;
+
+.Lfirst_block:
+ /* load data_block */
+ movq 0*8(%rsi), RB0;
+ movq 1*8(%rsi), RB1;
+ bswapq RB0;
+ movq 2*8(%rsi), RB2;
+ bswapq RB1;
+ movq 3*8(%rsi), RB3;
+ bswapq RB2;
+ movq 4*8(%rsi), RB4;
+ bswapq RB3;
+ movq 5*8(%rsi), RB5;
+ bswapq RB4;
+ movq RB0, XSTATE0;
+ movq 6*8(%rsi), RB6;
+ bswapq RB5;
+ movq RB1, XSTATE1;
+ movq 7*8(%rsi), RB7;
+ bswapq RB6;
+ movq RB2, XSTATE2;
+ bswapq RB7;
+ movq RB3, XSTATE3;
+ movq RB4, XSTATE4;
+ movq RB5, XSTATE5;
+ movq RB6, XSTATE6;
+ movq RB7, XSTATE7;
+
+ /* load key */
+ movq 0*8(%rdi), XKEY0;
+ movq 1*8(%rdi), XKEY1;
+ movq 2*8(%rdi), XKEY2;
+ movq 3*8(%rdi), XKEY3;
+ movq 4*8(%rdi), XKEY4;
+ movq 5*8(%rdi), XKEY5;
+ movq 6*8(%rdi), XKEY6;
+ movq 7*8(%rdi), XKEY7;
+
+ movq XKEY0, RI1;
+ movq XKEY1, RI2;
+ movq XKEY2, RI3;
+ movq XKEY3, RI4;
+
+ /* prepare and store state */
+ pxor XKEY0, XSTATE0;
+ pxor XKEY1, XSTATE1;
+ pxor XKEY2, XSTATE2;
+ pxor XKEY3, XSTATE3;
+ pxor XKEY4, XSTATE4;
+ pxor XKEY5, XSTATE5;
+ pxor XKEY6, XSTATE6;
+ pxor XKEY7, XSTATE7;
+
+ movq XSTATE0, 0*8(%rdi);
+ movq XSTATE1, 1*8(%rdi);
+ movq XSTATE2, 2*8(%rdi);
+ movq XSTATE3, 3*8(%rdi);
+ movq XSTATE4, 4*8(%rdi);
+ movq XSTATE5, 5*8(%rdi);
+ movq XSTATE6, 6*8(%rdi);
+ movq XSTATE7, 7*8(%rdi);
+
+ addq $64, STACK_DATAP(%rsp);
+ movl $(0), STACK_ROUNDS(%rsp);
+.align 8
+.Lround_loop:
+ do_whirl(mov, RI1 /*XKEY0*/, RB_ADD0, do_movq, XKEY4);
+ do_whirl(xor, RI2 /*XKEY1*/, RB_ADD1, do_movq, XKEY5);
+ do_whirl(xor, RI3 /*XKEY2*/, RB_ADD2, do_movq, XKEY6);
+ do_whirl(xor, RI4 /*XKEY3*/, RB_ADD3, do_movq, XKEY7);
+ do_whirl(xor, RI1 /*XKEY0*/, RB_ADD4, do_movq, XSTATE0);
+ do_whirl(xor, RI2 /*XKEY1*/, RB_ADD5, do_movq, XSTATE1);
+ do_whirl(xor, RI3 /*XKEY2*/, RB_ADD6, do_movq, XSTATE2);
+ do_whirl(xor, RI4 /*XKEY3*/, RB_ADD7, do_movq, XSTATE3);
+
+ movl STACK_ROUNDS(%rsp), RT0d;
+ movq RB1, XKEY1;
+ addl $1, STACK_ROUNDS(%rsp);
+ movq RB2, XKEY2;
+ movq RB3, XKEY3;
+ xorq RC(RTAB,RT0,8), RB0; /* Add round constant */
+ movq RB4, XKEY4;
+ movq RB5, XKEY5;
+ movq RB0, XKEY0;
+ movq RB6, XKEY6;
+ movq RB7, XKEY7;
+
+ do_whirl(xor, RI1 /*XSTATE0*/, RB_ADD0, do_movq, XSTATE4);
+ do_whirl(xor, RI2 /*XSTATE1*/, RB_ADD1, do_movq, XSTATE5);
+ do_whirl(xor, RI3 /*XSTATE2*/, RB_ADD2, do_movq, XSTATE6);
+ do_whirl(xor, RI4 /*XSTATE3*/, RB_ADD3, do_movq, XSTATE7);
+
+ cmpl $10, STACK_ROUNDS(%rsp);
+ je .Lis_last_round;
+
+ do_whirl(xor, RI1 /*XSTATE4*/, RB_ADD4, do_movq, XKEY0);
+ do_whirl(xor, RI2 /*XSTATE5*/, RB_ADD5, do_movq, XKEY1);
+ do_whirl(xor, RI3 /*XSTATE6*/, RB_ADD6, do_movq, XKEY2);
+ do_whirl(xor, RI4 /*XSTATE7*/, RB_ADD7, do_movq, XKEY3);
+ movq RB0, XSTATE0;
+ movq RB1, XSTATE1;
+ movq RB2, XSTATE2;
+ movq RB3, XSTATE3;
+ movq RB4, XSTATE4;
+ movq RB5, XSTATE5;
+ movq RB6, XSTATE6;
+ movq RB7, XSTATE7;
+
+ jmp .Lround_loop;
+.align 8
+.Lis_last_round:
+ do_whirl(xor, RI1 /*XSTATE4*/, RB_ADD4, dummy, _);
+ movq STACK_STATEP(%rsp), RI1;
+ do_whirl(xor, RI2 /*XSTATE5*/, RB_ADD5, dummy, _);
+ do_whirl(xor, RI3 /*XSTATE6*/, RB_ADD6, dummy, _);
+ do_whirl(xor, RI4 /*XSTATE7*/, RB_ADD7, dummy, _);
+
+ /* store state */
+ xorq RB0, 0*8(RI1);
+ xorq RB1, 1*8(RI1);
+ xorq RB2, 2*8(RI1);
+ xorq RB3, 3*8(RI1);
+ xorq RB4, 4*8(RI1);
+ xorq RB5, 5*8(RI1);
+ xorq RB6, 6*8(RI1);
+ xorq RB7, 7*8(RI1);
+
+ subq $1, STACK_NBLKS(%rsp);
+ jnz .Lblock_loop;
+
+ movq STACK_RBP(%rsp), %rbp;
+ movq STACK_RBX(%rsp), %rbx;
+ movq STACK_R12(%rsp), %r12;
+ movq STACK_R13(%rsp), %r13;
+ movq STACK_R14(%rsp), %r14;
+ movq STACK_R15(%rsp), %r15;
+ CFI_RESTORE(%rbp);
+ CFI_RESTORE(%rbx);
+ CFI_RESTORE(%r12);
+ CFI_RESTORE(%r13);
+ CFI_RESTORE(%r14);
+ CFI_RESTORE(%r15);
+ addq $STACK_MAX, %rsp;
+ CFI_ADJUST_CFA_OFFSET(-STACK_MAX);
+.Lskip:
+ movl $(STACK_MAX + 8), %eax;
+ ret;
+ CFI_ENDPROC();
+ELF(.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;)
+
+#endif
+#endif
diff --git a/comm/third_party/libgcrypt/cipher/whirlpool.c b/comm/third_party/libgcrypt/cipher/whirlpool.c
new file mode 100644
index 0000000000..79b2026b57
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/whirlpool.c
@@ -0,0 +1,1535 @@
+/* whirlpool.c - Whirlpool hashing algorithm
+ * Copyright (C) 2005 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This is an implementation of the Whirlpool hashing algorithm, which
+ has been developed by Vincent Rijmen and Paulo S. L. M. Barreto;
+ it's homepage is located at:
+ http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html
+
+ The S-Boxes and the structure of the main transformation function,
+ which implements an optimized version of the algorithm, is taken
+ from the reference implementation available from
+ http://www.larc.usp.br/~pbarreto/whirlpool.zip
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+
+#include "bufhelp.h"
+#include "hash-common.h"
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+
+
+/* Size of a whirlpool block (in bytes). */
+#define BLOCK_SIZE 64
+
+/* Number of rounds. */
+#define R 10
+
+
+
+/* Types. */
+typedef u64 whirlpool_block_t[BLOCK_SIZE / 8];
+
+typedef struct {
+ gcry_md_block_ctx_t bctx;
+ whirlpool_block_t hash_state;
+ int use_bugemu;
+ struct {
+ size_t count;
+ unsigned char length[32];
+ } bugemu;
+} whirlpool_context_t;
+
+
+
+/* Macros. */
+
+/* Convert the the buffer BUFFER into a block BLOCK, using I as
+ counter. */
+#define buffer_to_block(buffer, block, i) \
+ for (i = 0; i < 8; i++) \
+ (block)[i] = buf_get_be64((buffer) + i * 8);
+
+/* Convert the block BLOCK into a buffer BUFFER, using I as
+ counter. */
+#define block_to_buffer(buffer, block, i) \
+ for (i = 0; i < 8; i++) \
+ buf_put_be64((buffer) + i * 8, (block)[i]);
+
+/* Copy the block BLOCK_SRC to BLOCK_DST, using I as counter. */
+#define block_copy(block_dst, block_src, i) \
+ for (i = 0; i < 8; i++) \
+ block_dst[i] = block_src[i];
+
+/* XOR the block BLOCK_SRC into BLOCK_DST, using I as counter. */
+#define block_xor(block_dst, block_src, i) \
+ for (i = 0; i < 8; i++) \
+ block_dst[i] ^= block_src[i];
+
+
+
+
+struct whirlpool_tables_s {
+ u64 RC[R];
+ u64 C[8][256];
+};
+
+static const struct whirlpool_tables_s tab =
+{
+/* Round constants. */
+ {
+ U64_C (0x1823c6e887b8014f),
+ U64_C (0x36a6d2f5796f9152),
+ U64_C (0x60bc9b8ea30c7b35),
+ U64_C (0x1de0d7c22e4bfe57),
+ U64_C (0x157737e59ff04ada),
+ U64_C (0x58c9290ab1a06b85),
+ U64_C (0xbd5d10f4cb3e0567),
+ U64_C (0xe427418ba77d95d8),
+ U64_C (0xfbee7c66dd17479e),
+ U64_C (0xca2dbf07ad5a8333),
+ },
+/* Main lookup boxes. */
+ { {
+ U64_C (0x18186018c07830d8), U64_C (0x23238c2305af4626),
+ U64_C (0xc6c63fc67ef991b8), U64_C (0xe8e887e8136fcdfb),
+ U64_C (0x878726874ca113cb), U64_C (0xb8b8dab8a9626d11),
+ U64_C (0x0101040108050209), U64_C (0x4f4f214f426e9e0d),
+ U64_C (0x3636d836adee6c9b), U64_C (0xa6a6a2a6590451ff),
+ U64_C (0xd2d26fd2debdb90c), U64_C (0xf5f5f3f5fb06f70e),
+ U64_C (0x7979f979ef80f296), U64_C (0x6f6fa16f5fcede30),
+ U64_C (0x91917e91fcef3f6d), U64_C (0x52525552aa07a4f8),
+ U64_C (0x60609d6027fdc047), U64_C (0xbcbccabc89766535),
+ U64_C (0x9b9b569baccd2b37), U64_C (0x8e8e028e048c018a),
+ U64_C (0xa3a3b6a371155bd2), U64_C (0x0c0c300c603c186c),
+ U64_C (0x7b7bf17bff8af684), U64_C (0x3535d435b5e16a80),
+ U64_C (0x1d1d741de8693af5), U64_C (0xe0e0a7e05347ddb3),
+ U64_C (0xd7d77bd7f6acb321), U64_C (0xc2c22fc25eed999c),
+ U64_C (0x2e2eb82e6d965c43), U64_C (0x4b4b314b627a9629),
+ U64_C (0xfefedffea321e15d), U64_C (0x575741578216aed5),
+ U64_C (0x15155415a8412abd), U64_C (0x7777c1779fb6eee8),
+ U64_C (0x3737dc37a5eb6e92), U64_C (0xe5e5b3e57b56d79e),
+ U64_C (0x9f9f469f8cd92313), U64_C (0xf0f0e7f0d317fd23),
+ U64_C (0x4a4a354a6a7f9420), U64_C (0xdada4fda9e95a944),
+ U64_C (0x58587d58fa25b0a2), U64_C (0xc9c903c906ca8fcf),
+ U64_C (0x2929a429558d527c), U64_C (0x0a0a280a5022145a),
+ U64_C (0xb1b1feb1e14f7f50), U64_C (0xa0a0baa0691a5dc9),
+ U64_C (0x6b6bb16b7fdad614), U64_C (0x85852e855cab17d9),
+ U64_C (0xbdbdcebd8173673c), U64_C (0x5d5d695dd234ba8f),
+ U64_C (0x1010401080502090), U64_C (0xf4f4f7f4f303f507),
+ U64_C (0xcbcb0bcb16c08bdd), U64_C (0x3e3ef83eedc67cd3),
+ U64_C (0x0505140528110a2d), U64_C (0x676781671fe6ce78),
+ U64_C (0xe4e4b7e47353d597), U64_C (0x27279c2725bb4e02),
+ U64_C (0x4141194132588273), U64_C (0x8b8b168b2c9d0ba7),
+ U64_C (0xa7a7a6a7510153f6), U64_C (0x7d7de97dcf94fab2),
+ U64_C (0x95956e95dcfb3749), U64_C (0xd8d847d88e9fad56),
+ U64_C (0xfbfbcbfb8b30eb70), U64_C (0xeeee9fee2371c1cd),
+ U64_C (0x7c7ced7cc791f8bb), U64_C (0x6666856617e3cc71),
+ U64_C (0xdddd53dda68ea77b), U64_C (0x17175c17b84b2eaf),
+ U64_C (0x4747014702468e45), U64_C (0x9e9e429e84dc211a),
+ U64_C (0xcaca0fca1ec589d4), U64_C (0x2d2db42d75995a58),
+ U64_C (0xbfbfc6bf9179632e), U64_C (0x07071c07381b0e3f),
+ U64_C (0xadad8ead012347ac), U64_C (0x5a5a755aea2fb4b0),
+ U64_C (0x838336836cb51bef), U64_C (0x3333cc3385ff66b6),
+ U64_C (0x636391633ff2c65c), U64_C (0x02020802100a0412),
+ U64_C (0xaaaa92aa39384993), U64_C (0x7171d971afa8e2de),
+ U64_C (0xc8c807c80ecf8dc6), U64_C (0x19196419c87d32d1),
+ U64_C (0x494939497270923b), U64_C (0xd9d943d9869aaf5f),
+ U64_C (0xf2f2eff2c31df931), U64_C (0xe3e3abe34b48dba8),
+ U64_C (0x5b5b715be22ab6b9), U64_C (0x88881a8834920dbc),
+ U64_C (0x9a9a529aa4c8293e), U64_C (0x262698262dbe4c0b),
+ U64_C (0x3232c8328dfa64bf), U64_C (0xb0b0fab0e94a7d59),
+ U64_C (0xe9e983e91b6acff2), U64_C (0x0f0f3c0f78331e77),
+ U64_C (0xd5d573d5e6a6b733), U64_C (0x80803a8074ba1df4),
+ U64_C (0xbebec2be997c6127), U64_C (0xcdcd13cd26de87eb),
+ U64_C (0x3434d034bde46889), U64_C (0x48483d487a759032),
+ U64_C (0xffffdbffab24e354), U64_C (0x7a7af57af78ff48d),
+ U64_C (0x90907a90f4ea3d64), U64_C (0x5f5f615fc23ebe9d),
+ U64_C (0x202080201da0403d), U64_C (0x6868bd6867d5d00f),
+ U64_C (0x1a1a681ad07234ca), U64_C (0xaeae82ae192c41b7),
+ U64_C (0xb4b4eab4c95e757d), U64_C (0x54544d549a19a8ce),
+ U64_C (0x93937693ece53b7f), U64_C (0x222288220daa442f),
+ U64_C (0x64648d6407e9c863), U64_C (0xf1f1e3f1db12ff2a),
+ U64_C (0x7373d173bfa2e6cc), U64_C (0x12124812905a2482),
+ U64_C (0x40401d403a5d807a), U64_C (0x0808200840281048),
+ U64_C (0xc3c32bc356e89b95), U64_C (0xecec97ec337bc5df),
+ U64_C (0xdbdb4bdb9690ab4d), U64_C (0xa1a1bea1611f5fc0),
+ U64_C (0x8d8d0e8d1c830791), U64_C (0x3d3df43df5c97ac8),
+ U64_C (0x97976697ccf1335b), U64_C (0x0000000000000000),
+ U64_C (0xcfcf1bcf36d483f9), U64_C (0x2b2bac2b4587566e),
+ U64_C (0x7676c57697b3ece1), U64_C (0x8282328264b019e6),
+ U64_C (0xd6d67fd6fea9b128), U64_C (0x1b1b6c1bd87736c3),
+ U64_C (0xb5b5eeb5c15b7774), U64_C (0xafaf86af112943be),
+ U64_C (0x6a6ab56a77dfd41d), U64_C (0x50505d50ba0da0ea),
+ U64_C (0x45450945124c8a57), U64_C (0xf3f3ebf3cb18fb38),
+ U64_C (0x3030c0309df060ad), U64_C (0xefef9bef2b74c3c4),
+ U64_C (0x3f3ffc3fe5c37eda), U64_C (0x55554955921caac7),
+ U64_C (0xa2a2b2a2791059db), U64_C (0xeaea8fea0365c9e9),
+ U64_C (0x656589650fecca6a), U64_C (0xbabad2bab9686903),
+ U64_C (0x2f2fbc2f65935e4a), U64_C (0xc0c027c04ee79d8e),
+ U64_C (0xdede5fdebe81a160), U64_C (0x1c1c701ce06c38fc),
+ U64_C (0xfdfdd3fdbb2ee746), U64_C (0x4d4d294d52649a1f),
+ U64_C (0x92927292e4e03976), U64_C (0x7575c9758fbceafa),
+ U64_C (0x06061806301e0c36), U64_C (0x8a8a128a249809ae),
+ U64_C (0xb2b2f2b2f940794b), U64_C (0xe6e6bfe66359d185),
+ U64_C (0x0e0e380e70361c7e), U64_C (0x1f1f7c1ff8633ee7),
+ U64_C (0x6262956237f7c455), U64_C (0xd4d477d4eea3b53a),
+ U64_C (0xa8a89aa829324d81), U64_C (0x96966296c4f43152),
+ U64_C (0xf9f9c3f99b3aef62), U64_C (0xc5c533c566f697a3),
+ U64_C (0x2525942535b14a10), U64_C (0x59597959f220b2ab),
+ U64_C (0x84842a8454ae15d0), U64_C (0x7272d572b7a7e4c5),
+ U64_C (0x3939e439d5dd72ec), U64_C (0x4c4c2d4c5a619816),
+ U64_C (0x5e5e655eca3bbc94), U64_C (0x7878fd78e785f09f),
+ U64_C (0x3838e038ddd870e5), U64_C (0x8c8c0a8c14860598),
+ U64_C (0xd1d163d1c6b2bf17), U64_C (0xa5a5aea5410b57e4),
+ U64_C (0xe2e2afe2434dd9a1), U64_C (0x616199612ff8c24e),
+ U64_C (0xb3b3f6b3f1457b42), U64_C (0x2121842115a54234),
+ U64_C (0x9c9c4a9c94d62508), U64_C (0x1e1e781ef0663cee),
+ U64_C (0x4343114322528661), U64_C (0xc7c73bc776fc93b1),
+ U64_C (0xfcfcd7fcb32be54f), U64_C (0x0404100420140824),
+ U64_C (0x51515951b208a2e3), U64_C (0x99995e99bcc72f25),
+ U64_C (0x6d6da96d4fc4da22), U64_C (0x0d0d340d68391a65),
+ U64_C (0xfafacffa8335e979), U64_C (0xdfdf5bdfb684a369),
+ U64_C (0x7e7ee57ed79bfca9), U64_C (0x242490243db44819),
+ U64_C (0x3b3bec3bc5d776fe), U64_C (0xabab96ab313d4b9a),
+ U64_C (0xcece1fce3ed181f0), U64_C (0x1111441188552299),
+ U64_C (0x8f8f068f0c890383), U64_C (0x4e4e254e4a6b9c04),
+ U64_C (0xb7b7e6b7d1517366), U64_C (0xebeb8beb0b60cbe0),
+ U64_C (0x3c3cf03cfdcc78c1), U64_C (0x81813e817cbf1ffd),
+ U64_C (0x94946a94d4fe3540), U64_C (0xf7f7fbf7eb0cf31c),
+ U64_C (0xb9b9deb9a1676f18), U64_C (0x13134c13985f268b),
+ U64_C (0x2c2cb02c7d9c5851), U64_C (0xd3d36bd3d6b8bb05),
+ U64_C (0xe7e7bbe76b5cd38c), U64_C (0x6e6ea56e57cbdc39),
+ U64_C (0xc4c437c46ef395aa), U64_C (0x03030c03180f061b),
+ U64_C (0x565645568a13acdc), U64_C (0x44440d441a49885e),
+ U64_C (0x7f7fe17fdf9efea0), U64_C (0xa9a99ea921374f88),
+ U64_C (0x2a2aa82a4d825467), U64_C (0xbbbbd6bbb16d6b0a),
+ U64_C (0xc1c123c146e29f87), U64_C (0x53535153a202a6f1),
+ U64_C (0xdcdc57dcae8ba572), U64_C (0x0b0b2c0b58271653),
+ U64_C (0x9d9d4e9d9cd32701), U64_C (0x6c6cad6c47c1d82b),
+ U64_C (0x3131c43195f562a4), U64_C (0x7474cd7487b9e8f3),
+ U64_C (0xf6f6fff6e309f115), U64_C (0x464605460a438c4c),
+ U64_C (0xacac8aac092645a5), U64_C (0x89891e893c970fb5),
+ U64_C (0x14145014a04428b4), U64_C (0xe1e1a3e15b42dfba),
+ U64_C (0x16165816b04e2ca6), U64_C (0x3a3ae83acdd274f7),
+ U64_C (0x6969b9696fd0d206), U64_C (0x09092409482d1241),
+ U64_C (0x7070dd70a7ade0d7), U64_C (0xb6b6e2b6d954716f),
+ U64_C (0xd0d067d0ceb7bd1e), U64_C (0xeded93ed3b7ec7d6),
+ U64_C (0xcccc17cc2edb85e2), U64_C (0x424215422a578468),
+ U64_C (0x98985a98b4c22d2c), U64_C (0xa4a4aaa4490e55ed),
+ U64_C (0x2828a0285d885075), U64_C (0x5c5c6d5cda31b886),
+ U64_C (0xf8f8c7f8933fed6b), U64_C (0x8686228644a411c2),
+ }, {
+ U64_C (0xd818186018c07830), U64_C (0x2623238c2305af46),
+ U64_C (0xb8c6c63fc67ef991), U64_C (0xfbe8e887e8136fcd),
+ U64_C (0xcb878726874ca113), U64_C (0x11b8b8dab8a9626d),
+ U64_C (0x0901010401080502), U64_C (0x0d4f4f214f426e9e),
+ U64_C (0x9b3636d836adee6c), U64_C (0xffa6a6a2a6590451),
+ U64_C (0x0cd2d26fd2debdb9), U64_C (0x0ef5f5f3f5fb06f7),
+ U64_C (0x967979f979ef80f2), U64_C (0x306f6fa16f5fcede),
+ U64_C (0x6d91917e91fcef3f), U64_C (0xf852525552aa07a4),
+ U64_C (0x4760609d6027fdc0), U64_C (0x35bcbccabc897665),
+ U64_C (0x379b9b569baccd2b), U64_C (0x8a8e8e028e048c01),
+ U64_C (0xd2a3a3b6a371155b), U64_C (0x6c0c0c300c603c18),
+ U64_C (0x847b7bf17bff8af6), U64_C (0x803535d435b5e16a),
+ U64_C (0xf51d1d741de8693a), U64_C (0xb3e0e0a7e05347dd),
+ U64_C (0x21d7d77bd7f6acb3), U64_C (0x9cc2c22fc25eed99),
+ U64_C (0x432e2eb82e6d965c), U64_C (0x294b4b314b627a96),
+ U64_C (0x5dfefedffea321e1), U64_C (0xd5575741578216ae),
+ U64_C (0xbd15155415a8412a), U64_C (0xe87777c1779fb6ee),
+ U64_C (0x923737dc37a5eb6e), U64_C (0x9ee5e5b3e57b56d7),
+ U64_C (0x139f9f469f8cd923), U64_C (0x23f0f0e7f0d317fd),
+ U64_C (0x204a4a354a6a7f94), U64_C (0x44dada4fda9e95a9),
+ U64_C (0xa258587d58fa25b0), U64_C (0xcfc9c903c906ca8f),
+ U64_C (0x7c2929a429558d52), U64_C (0x5a0a0a280a502214),
+ U64_C (0x50b1b1feb1e14f7f), U64_C (0xc9a0a0baa0691a5d),
+ U64_C (0x146b6bb16b7fdad6), U64_C (0xd985852e855cab17),
+ U64_C (0x3cbdbdcebd817367), U64_C (0x8f5d5d695dd234ba),
+ U64_C (0x9010104010805020), U64_C (0x07f4f4f7f4f303f5),
+ U64_C (0xddcbcb0bcb16c08b), U64_C (0xd33e3ef83eedc67c),
+ U64_C (0x2d0505140528110a), U64_C (0x78676781671fe6ce),
+ U64_C (0x97e4e4b7e47353d5), U64_C (0x0227279c2725bb4e),
+ U64_C (0x7341411941325882), U64_C (0xa78b8b168b2c9d0b),
+ U64_C (0xf6a7a7a6a7510153), U64_C (0xb27d7de97dcf94fa),
+ U64_C (0x4995956e95dcfb37), U64_C (0x56d8d847d88e9fad),
+ U64_C (0x70fbfbcbfb8b30eb), U64_C (0xcdeeee9fee2371c1),
+ U64_C (0xbb7c7ced7cc791f8), U64_C (0x716666856617e3cc),
+ U64_C (0x7bdddd53dda68ea7), U64_C (0xaf17175c17b84b2e),
+ U64_C (0x454747014702468e), U64_C (0x1a9e9e429e84dc21),
+ U64_C (0xd4caca0fca1ec589), U64_C (0x582d2db42d75995a),
+ U64_C (0x2ebfbfc6bf917963), U64_C (0x3f07071c07381b0e),
+ U64_C (0xacadad8ead012347), U64_C (0xb05a5a755aea2fb4),
+ U64_C (0xef838336836cb51b), U64_C (0xb63333cc3385ff66),
+ U64_C (0x5c636391633ff2c6), U64_C (0x1202020802100a04),
+ U64_C (0x93aaaa92aa393849), U64_C (0xde7171d971afa8e2),
+ U64_C (0xc6c8c807c80ecf8d), U64_C (0xd119196419c87d32),
+ U64_C (0x3b49493949727092), U64_C (0x5fd9d943d9869aaf),
+ U64_C (0x31f2f2eff2c31df9), U64_C (0xa8e3e3abe34b48db),
+ U64_C (0xb95b5b715be22ab6), U64_C (0xbc88881a8834920d),
+ U64_C (0x3e9a9a529aa4c829), U64_C (0x0b262698262dbe4c),
+ U64_C (0xbf3232c8328dfa64), U64_C (0x59b0b0fab0e94a7d),
+ U64_C (0xf2e9e983e91b6acf), U64_C (0x770f0f3c0f78331e),
+ U64_C (0x33d5d573d5e6a6b7), U64_C (0xf480803a8074ba1d),
+ U64_C (0x27bebec2be997c61), U64_C (0xebcdcd13cd26de87),
+ U64_C (0x893434d034bde468), U64_C (0x3248483d487a7590),
+ U64_C (0x54ffffdbffab24e3), U64_C (0x8d7a7af57af78ff4),
+ U64_C (0x6490907a90f4ea3d), U64_C (0x9d5f5f615fc23ebe),
+ U64_C (0x3d202080201da040), U64_C (0x0f6868bd6867d5d0),
+ U64_C (0xca1a1a681ad07234), U64_C (0xb7aeae82ae192c41),
+ U64_C (0x7db4b4eab4c95e75), U64_C (0xce54544d549a19a8),
+ U64_C (0x7f93937693ece53b), U64_C (0x2f222288220daa44),
+ U64_C (0x6364648d6407e9c8), U64_C (0x2af1f1e3f1db12ff),
+ U64_C (0xcc7373d173bfa2e6), U64_C (0x8212124812905a24),
+ U64_C (0x7a40401d403a5d80), U64_C (0x4808082008402810),
+ U64_C (0x95c3c32bc356e89b), U64_C (0xdfecec97ec337bc5),
+ U64_C (0x4ddbdb4bdb9690ab), U64_C (0xc0a1a1bea1611f5f),
+ U64_C (0x918d8d0e8d1c8307), U64_C (0xc83d3df43df5c97a),
+ U64_C (0x5b97976697ccf133), U64_C (0x0000000000000000),
+ U64_C (0xf9cfcf1bcf36d483), U64_C (0x6e2b2bac2b458756),
+ U64_C (0xe17676c57697b3ec), U64_C (0xe68282328264b019),
+ U64_C (0x28d6d67fd6fea9b1), U64_C (0xc31b1b6c1bd87736),
+ U64_C (0x74b5b5eeb5c15b77), U64_C (0xbeafaf86af112943),
+ U64_C (0x1d6a6ab56a77dfd4), U64_C (0xea50505d50ba0da0),
+ U64_C (0x5745450945124c8a), U64_C (0x38f3f3ebf3cb18fb),
+ U64_C (0xad3030c0309df060), U64_C (0xc4efef9bef2b74c3),
+ U64_C (0xda3f3ffc3fe5c37e), U64_C (0xc755554955921caa),
+ U64_C (0xdba2a2b2a2791059), U64_C (0xe9eaea8fea0365c9),
+ U64_C (0x6a656589650fecca), U64_C (0x03babad2bab96869),
+ U64_C (0x4a2f2fbc2f65935e), U64_C (0x8ec0c027c04ee79d),
+ U64_C (0x60dede5fdebe81a1), U64_C (0xfc1c1c701ce06c38),
+ U64_C (0x46fdfdd3fdbb2ee7), U64_C (0x1f4d4d294d52649a),
+ U64_C (0x7692927292e4e039), U64_C (0xfa7575c9758fbcea),
+ U64_C (0x3606061806301e0c), U64_C (0xae8a8a128a249809),
+ U64_C (0x4bb2b2f2b2f94079), U64_C (0x85e6e6bfe66359d1),
+ U64_C (0x7e0e0e380e70361c), U64_C (0xe71f1f7c1ff8633e),
+ U64_C (0x556262956237f7c4), U64_C (0x3ad4d477d4eea3b5),
+ U64_C (0x81a8a89aa829324d), U64_C (0x5296966296c4f431),
+ U64_C (0x62f9f9c3f99b3aef), U64_C (0xa3c5c533c566f697),
+ U64_C (0x102525942535b14a), U64_C (0xab59597959f220b2),
+ U64_C (0xd084842a8454ae15), U64_C (0xc57272d572b7a7e4),
+ U64_C (0xec3939e439d5dd72), U64_C (0x164c4c2d4c5a6198),
+ U64_C (0x945e5e655eca3bbc), U64_C (0x9f7878fd78e785f0),
+ U64_C (0xe53838e038ddd870), U64_C (0x988c8c0a8c148605),
+ U64_C (0x17d1d163d1c6b2bf), U64_C (0xe4a5a5aea5410b57),
+ U64_C (0xa1e2e2afe2434dd9), U64_C (0x4e616199612ff8c2),
+ U64_C (0x42b3b3f6b3f1457b), U64_C (0x342121842115a542),
+ U64_C (0x089c9c4a9c94d625), U64_C (0xee1e1e781ef0663c),
+ U64_C (0x6143431143225286), U64_C (0xb1c7c73bc776fc93),
+ U64_C (0x4ffcfcd7fcb32be5), U64_C (0x2404041004201408),
+ U64_C (0xe351515951b208a2), U64_C (0x2599995e99bcc72f),
+ U64_C (0x226d6da96d4fc4da), U64_C (0x650d0d340d68391a),
+ U64_C (0x79fafacffa8335e9), U64_C (0x69dfdf5bdfb684a3),
+ U64_C (0xa97e7ee57ed79bfc), U64_C (0x19242490243db448),
+ U64_C (0xfe3b3bec3bc5d776), U64_C (0x9aabab96ab313d4b),
+ U64_C (0xf0cece1fce3ed181), U64_C (0x9911114411885522),
+ U64_C (0x838f8f068f0c8903), U64_C (0x044e4e254e4a6b9c),
+ U64_C (0x66b7b7e6b7d15173), U64_C (0xe0ebeb8beb0b60cb),
+ U64_C (0xc13c3cf03cfdcc78), U64_C (0xfd81813e817cbf1f),
+ U64_C (0x4094946a94d4fe35), U64_C (0x1cf7f7fbf7eb0cf3),
+ U64_C (0x18b9b9deb9a1676f), U64_C (0x8b13134c13985f26),
+ U64_C (0x512c2cb02c7d9c58), U64_C (0x05d3d36bd3d6b8bb),
+ U64_C (0x8ce7e7bbe76b5cd3), U64_C (0x396e6ea56e57cbdc),
+ U64_C (0xaac4c437c46ef395), U64_C (0x1b03030c03180f06),
+ U64_C (0xdc565645568a13ac), U64_C (0x5e44440d441a4988),
+ U64_C (0xa07f7fe17fdf9efe), U64_C (0x88a9a99ea921374f),
+ U64_C (0x672a2aa82a4d8254), U64_C (0x0abbbbd6bbb16d6b),
+ U64_C (0x87c1c123c146e29f), U64_C (0xf153535153a202a6),
+ U64_C (0x72dcdc57dcae8ba5), U64_C (0x530b0b2c0b582716),
+ U64_C (0x019d9d4e9d9cd327), U64_C (0x2b6c6cad6c47c1d8),
+ U64_C (0xa43131c43195f562), U64_C (0xf37474cd7487b9e8),
+ U64_C (0x15f6f6fff6e309f1), U64_C (0x4c464605460a438c),
+ U64_C (0xa5acac8aac092645), U64_C (0xb589891e893c970f),
+ U64_C (0xb414145014a04428), U64_C (0xbae1e1a3e15b42df),
+ U64_C (0xa616165816b04e2c), U64_C (0xf73a3ae83acdd274),
+ U64_C (0x066969b9696fd0d2), U64_C (0x4109092409482d12),
+ U64_C (0xd77070dd70a7ade0), U64_C (0x6fb6b6e2b6d95471),
+ U64_C (0x1ed0d067d0ceb7bd), U64_C (0xd6eded93ed3b7ec7),
+ U64_C (0xe2cccc17cc2edb85), U64_C (0x68424215422a5784),
+ U64_C (0x2c98985a98b4c22d), U64_C (0xeda4a4aaa4490e55),
+ U64_C (0x752828a0285d8850), U64_C (0x865c5c6d5cda31b8),
+ U64_C (0x6bf8f8c7f8933fed), U64_C (0xc28686228644a411),
+ }, {
+ U64_C (0x30d818186018c078), U64_C (0x462623238c2305af),
+ U64_C (0x91b8c6c63fc67ef9), U64_C (0xcdfbe8e887e8136f),
+ U64_C (0x13cb878726874ca1), U64_C (0x6d11b8b8dab8a962),
+ U64_C (0x0209010104010805), U64_C (0x9e0d4f4f214f426e),
+ U64_C (0x6c9b3636d836adee), U64_C (0x51ffa6a6a2a65904),
+ U64_C (0xb90cd2d26fd2debd), U64_C (0xf70ef5f5f3f5fb06),
+ U64_C (0xf2967979f979ef80), U64_C (0xde306f6fa16f5fce),
+ U64_C (0x3f6d91917e91fcef), U64_C (0xa4f852525552aa07),
+ U64_C (0xc04760609d6027fd), U64_C (0x6535bcbccabc8976),
+ U64_C (0x2b379b9b569baccd), U64_C (0x018a8e8e028e048c),
+ U64_C (0x5bd2a3a3b6a37115), U64_C (0x186c0c0c300c603c),
+ U64_C (0xf6847b7bf17bff8a), U64_C (0x6a803535d435b5e1),
+ U64_C (0x3af51d1d741de869), U64_C (0xddb3e0e0a7e05347),
+ U64_C (0xb321d7d77bd7f6ac), U64_C (0x999cc2c22fc25eed),
+ U64_C (0x5c432e2eb82e6d96), U64_C (0x96294b4b314b627a),
+ U64_C (0xe15dfefedffea321), U64_C (0xaed5575741578216),
+ U64_C (0x2abd15155415a841), U64_C (0xeee87777c1779fb6),
+ U64_C (0x6e923737dc37a5eb), U64_C (0xd79ee5e5b3e57b56),
+ U64_C (0x23139f9f469f8cd9), U64_C (0xfd23f0f0e7f0d317),
+ U64_C (0x94204a4a354a6a7f), U64_C (0xa944dada4fda9e95),
+ U64_C (0xb0a258587d58fa25), U64_C (0x8fcfc9c903c906ca),
+ U64_C (0x527c2929a429558d), U64_C (0x145a0a0a280a5022),
+ U64_C (0x7f50b1b1feb1e14f), U64_C (0x5dc9a0a0baa0691a),
+ U64_C (0xd6146b6bb16b7fda), U64_C (0x17d985852e855cab),
+ U64_C (0x673cbdbdcebd8173), U64_C (0xba8f5d5d695dd234),
+ U64_C (0x2090101040108050), U64_C (0xf507f4f4f7f4f303),
+ U64_C (0x8bddcbcb0bcb16c0), U64_C (0x7cd33e3ef83eedc6),
+ U64_C (0x0a2d050514052811), U64_C (0xce78676781671fe6),
+ U64_C (0xd597e4e4b7e47353), U64_C (0x4e0227279c2725bb),
+ U64_C (0x8273414119413258), U64_C (0x0ba78b8b168b2c9d),
+ U64_C (0x53f6a7a7a6a75101), U64_C (0xfab27d7de97dcf94),
+ U64_C (0x374995956e95dcfb), U64_C (0xad56d8d847d88e9f),
+ U64_C (0xeb70fbfbcbfb8b30), U64_C (0xc1cdeeee9fee2371),
+ U64_C (0xf8bb7c7ced7cc791), U64_C (0xcc716666856617e3),
+ U64_C (0xa77bdddd53dda68e), U64_C (0x2eaf17175c17b84b),
+ U64_C (0x8e45474701470246), U64_C (0x211a9e9e429e84dc),
+ U64_C (0x89d4caca0fca1ec5), U64_C (0x5a582d2db42d7599),
+ U64_C (0x632ebfbfc6bf9179), U64_C (0x0e3f07071c07381b),
+ U64_C (0x47acadad8ead0123), U64_C (0xb4b05a5a755aea2f),
+ U64_C (0x1bef838336836cb5), U64_C (0x66b63333cc3385ff),
+ U64_C (0xc65c636391633ff2), U64_C (0x041202020802100a),
+ U64_C (0x4993aaaa92aa3938), U64_C (0xe2de7171d971afa8),
+ U64_C (0x8dc6c8c807c80ecf), U64_C (0x32d119196419c87d),
+ U64_C (0x923b494939497270), U64_C (0xaf5fd9d943d9869a),
+ U64_C (0xf931f2f2eff2c31d), U64_C (0xdba8e3e3abe34b48),
+ U64_C (0xb6b95b5b715be22a), U64_C (0x0dbc88881a883492),
+ U64_C (0x293e9a9a529aa4c8), U64_C (0x4c0b262698262dbe),
+ U64_C (0x64bf3232c8328dfa), U64_C (0x7d59b0b0fab0e94a),
+ U64_C (0xcff2e9e983e91b6a), U64_C (0x1e770f0f3c0f7833),
+ U64_C (0xb733d5d573d5e6a6), U64_C (0x1df480803a8074ba),
+ U64_C (0x6127bebec2be997c), U64_C (0x87ebcdcd13cd26de),
+ U64_C (0x68893434d034bde4), U64_C (0x903248483d487a75),
+ U64_C (0xe354ffffdbffab24), U64_C (0xf48d7a7af57af78f),
+ U64_C (0x3d6490907a90f4ea), U64_C (0xbe9d5f5f615fc23e),
+ U64_C (0x403d202080201da0), U64_C (0xd00f6868bd6867d5),
+ U64_C (0x34ca1a1a681ad072), U64_C (0x41b7aeae82ae192c),
+ U64_C (0x757db4b4eab4c95e), U64_C (0xa8ce54544d549a19),
+ U64_C (0x3b7f93937693ece5), U64_C (0x442f222288220daa),
+ U64_C (0xc86364648d6407e9), U64_C (0xff2af1f1e3f1db12),
+ U64_C (0xe6cc7373d173bfa2), U64_C (0x248212124812905a),
+ U64_C (0x807a40401d403a5d), U64_C (0x1048080820084028),
+ U64_C (0x9b95c3c32bc356e8), U64_C (0xc5dfecec97ec337b),
+ U64_C (0xab4ddbdb4bdb9690), U64_C (0x5fc0a1a1bea1611f),
+ U64_C (0x07918d8d0e8d1c83), U64_C (0x7ac83d3df43df5c9),
+ U64_C (0x335b97976697ccf1), U64_C (0x0000000000000000),
+ U64_C (0x83f9cfcf1bcf36d4), U64_C (0x566e2b2bac2b4587),
+ U64_C (0xece17676c57697b3), U64_C (0x19e68282328264b0),
+ U64_C (0xb128d6d67fd6fea9), U64_C (0x36c31b1b6c1bd877),
+ U64_C (0x7774b5b5eeb5c15b), U64_C (0x43beafaf86af1129),
+ U64_C (0xd41d6a6ab56a77df), U64_C (0xa0ea50505d50ba0d),
+ U64_C (0x8a5745450945124c), U64_C (0xfb38f3f3ebf3cb18),
+ U64_C (0x60ad3030c0309df0), U64_C (0xc3c4efef9bef2b74),
+ U64_C (0x7eda3f3ffc3fe5c3), U64_C (0xaac755554955921c),
+ U64_C (0x59dba2a2b2a27910), U64_C (0xc9e9eaea8fea0365),
+ U64_C (0xca6a656589650fec), U64_C (0x6903babad2bab968),
+ U64_C (0x5e4a2f2fbc2f6593), U64_C (0x9d8ec0c027c04ee7),
+ U64_C (0xa160dede5fdebe81), U64_C (0x38fc1c1c701ce06c),
+ U64_C (0xe746fdfdd3fdbb2e), U64_C (0x9a1f4d4d294d5264),
+ U64_C (0x397692927292e4e0), U64_C (0xeafa7575c9758fbc),
+ U64_C (0x0c3606061806301e), U64_C (0x09ae8a8a128a2498),
+ U64_C (0x794bb2b2f2b2f940), U64_C (0xd185e6e6bfe66359),
+ U64_C (0x1c7e0e0e380e7036), U64_C (0x3ee71f1f7c1ff863),
+ U64_C (0xc4556262956237f7), U64_C (0xb53ad4d477d4eea3),
+ U64_C (0x4d81a8a89aa82932), U64_C (0x315296966296c4f4),
+ U64_C (0xef62f9f9c3f99b3a), U64_C (0x97a3c5c533c566f6),
+ U64_C (0x4a102525942535b1), U64_C (0xb2ab59597959f220),
+ U64_C (0x15d084842a8454ae), U64_C (0xe4c57272d572b7a7),
+ U64_C (0x72ec3939e439d5dd), U64_C (0x98164c4c2d4c5a61),
+ U64_C (0xbc945e5e655eca3b), U64_C (0xf09f7878fd78e785),
+ U64_C (0x70e53838e038ddd8), U64_C (0x05988c8c0a8c1486),
+ U64_C (0xbf17d1d163d1c6b2), U64_C (0x57e4a5a5aea5410b),
+ U64_C (0xd9a1e2e2afe2434d), U64_C (0xc24e616199612ff8),
+ U64_C (0x7b42b3b3f6b3f145), U64_C (0x42342121842115a5),
+ U64_C (0x25089c9c4a9c94d6), U64_C (0x3cee1e1e781ef066),
+ U64_C (0x8661434311432252), U64_C (0x93b1c7c73bc776fc),
+ U64_C (0xe54ffcfcd7fcb32b), U64_C (0x0824040410042014),
+ U64_C (0xa2e351515951b208), U64_C (0x2f2599995e99bcc7),
+ U64_C (0xda226d6da96d4fc4), U64_C (0x1a650d0d340d6839),
+ U64_C (0xe979fafacffa8335), U64_C (0xa369dfdf5bdfb684),
+ U64_C (0xfca97e7ee57ed79b), U64_C (0x4819242490243db4),
+ U64_C (0x76fe3b3bec3bc5d7), U64_C (0x4b9aabab96ab313d),
+ U64_C (0x81f0cece1fce3ed1), U64_C (0x2299111144118855),
+ U64_C (0x03838f8f068f0c89), U64_C (0x9c044e4e254e4a6b),
+ U64_C (0x7366b7b7e6b7d151), U64_C (0xcbe0ebeb8beb0b60),
+ U64_C (0x78c13c3cf03cfdcc), U64_C (0x1ffd81813e817cbf),
+ U64_C (0x354094946a94d4fe), U64_C (0xf31cf7f7fbf7eb0c),
+ U64_C (0x6f18b9b9deb9a167), U64_C (0x268b13134c13985f),
+ U64_C (0x58512c2cb02c7d9c), U64_C (0xbb05d3d36bd3d6b8),
+ U64_C (0xd38ce7e7bbe76b5c), U64_C (0xdc396e6ea56e57cb),
+ U64_C (0x95aac4c437c46ef3), U64_C (0x061b03030c03180f),
+ U64_C (0xacdc565645568a13), U64_C (0x885e44440d441a49),
+ U64_C (0xfea07f7fe17fdf9e), U64_C (0x4f88a9a99ea92137),
+ U64_C (0x54672a2aa82a4d82), U64_C (0x6b0abbbbd6bbb16d),
+ U64_C (0x9f87c1c123c146e2), U64_C (0xa6f153535153a202),
+ U64_C (0xa572dcdc57dcae8b), U64_C (0x16530b0b2c0b5827),
+ U64_C (0x27019d9d4e9d9cd3), U64_C (0xd82b6c6cad6c47c1),
+ U64_C (0x62a43131c43195f5), U64_C (0xe8f37474cd7487b9),
+ U64_C (0xf115f6f6fff6e309), U64_C (0x8c4c464605460a43),
+ U64_C (0x45a5acac8aac0926), U64_C (0x0fb589891e893c97),
+ U64_C (0x28b414145014a044), U64_C (0xdfbae1e1a3e15b42),
+ U64_C (0x2ca616165816b04e), U64_C (0x74f73a3ae83acdd2),
+ U64_C (0xd2066969b9696fd0), U64_C (0x124109092409482d),
+ U64_C (0xe0d77070dd70a7ad), U64_C (0x716fb6b6e2b6d954),
+ U64_C (0xbd1ed0d067d0ceb7), U64_C (0xc7d6eded93ed3b7e),
+ U64_C (0x85e2cccc17cc2edb), U64_C (0x8468424215422a57),
+ U64_C (0x2d2c98985a98b4c2), U64_C (0x55eda4a4aaa4490e),
+ U64_C (0x50752828a0285d88), U64_C (0xb8865c5c6d5cda31),
+ U64_C (0xed6bf8f8c7f8933f), U64_C (0x11c28686228644a4),
+ }, {
+ U64_C (0x7830d818186018c0), U64_C (0xaf462623238c2305),
+ U64_C (0xf991b8c6c63fc67e), U64_C (0x6fcdfbe8e887e813),
+ U64_C (0xa113cb878726874c), U64_C (0x626d11b8b8dab8a9),
+ U64_C (0x0502090101040108), U64_C (0x6e9e0d4f4f214f42),
+ U64_C (0xee6c9b3636d836ad), U64_C (0x0451ffa6a6a2a659),
+ U64_C (0xbdb90cd2d26fd2de), U64_C (0x06f70ef5f5f3f5fb),
+ U64_C (0x80f2967979f979ef), U64_C (0xcede306f6fa16f5f),
+ U64_C (0xef3f6d91917e91fc), U64_C (0x07a4f852525552aa),
+ U64_C (0xfdc04760609d6027), U64_C (0x766535bcbccabc89),
+ U64_C (0xcd2b379b9b569bac), U64_C (0x8c018a8e8e028e04),
+ U64_C (0x155bd2a3a3b6a371), U64_C (0x3c186c0c0c300c60),
+ U64_C (0x8af6847b7bf17bff), U64_C (0xe16a803535d435b5),
+ U64_C (0x693af51d1d741de8), U64_C (0x47ddb3e0e0a7e053),
+ U64_C (0xacb321d7d77bd7f6), U64_C (0xed999cc2c22fc25e),
+ U64_C (0x965c432e2eb82e6d), U64_C (0x7a96294b4b314b62),
+ U64_C (0x21e15dfefedffea3), U64_C (0x16aed55757415782),
+ U64_C (0x412abd15155415a8), U64_C (0xb6eee87777c1779f),
+ U64_C (0xeb6e923737dc37a5), U64_C (0x56d79ee5e5b3e57b),
+ U64_C (0xd923139f9f469f8c), U64_C (0x17fd23f0f0e7f0d3),
+ U64_C (0x7f94204a4a354a6a), U64_C (0x95a944dada4fda9e),
+ U64_C (0x25b0a258587d58fa), U64_C (0xca8fcfc9c903c906),
+ U64_C (0x8d527c2929a42955), U64_C (0x22145a0a0a280a50),
+ U64_C (0x4f7f50b1b1feb1e1), U64_C (0x1a5dc9a0a0baa069),
+ U64_C (0xdad6146b6bb16b7f), U64_C (0xab17d985852e855c),
+ U64_C (0x73673cbdbdcebd81), U64_C (0x34ba8f5d5d695dd2),
+ U64_C (0x5020901010401080), U64_C (0x03f507f4f4f7f4f3),
+ U64_C (0xc08bddcbcb0bcb16), U64_C (0xc67cd33e3ef83eed),
+ U64_C (0x110a2d0505140528), U64_C (0xe6ce78676781671f),
+ U64_C (0x53d597e4e4b7e473), U64_C (0xbb4e0227279c2725),
+ U64_C (0x5882734141194132), U64_C (0x9d0ba78b8b168b2c),
+ U64_C (0x0153f6a7a7a6a751), U64_C (0x94fab27d7de97dcf),
+ U64_C (0xfb374995956e95dc), U64_C (0x9fad56d8d847d88e),
+ U64_C (0x30eb70fbfbcbfb8b), U64_C (0x71c1cdeeee9fee23),
+ U64_C (0x91f8bb7c7ced7cc7), U64_C (0xe3cc716666856617),
+ U64_C (0x8ea77bdddd53dda6), U64_C (0x4b2eaf17175c17b8),
+ U64_C (0x468e454747014702), U64_C (0xdc211a9e9e429e84),
+ U64_C (0xc589d4caca0fca1e), U64_C (0x995a582d2db42d75),
+ U64_C (0x79632ebfbfc6bf91), U64_C (0x1b0e3f07071c0738),
+ U64_C (0x2347acadad8ead01), U64_C (0x2fb4b05a5a755aea),
+ U64_C (0xb51bef838336836c), U64_C (0xff66b63333cc3385),
+ U64_C (0xf2c65c636391633f), U64_C (0x0a04120202080210),
+ U64_C (0x384993aaaa92aa39), U64_C (0xa8e2de7171d971af),
+ U64_C (0xcf8dc6c8c807c80e), U64_C (0x7d32d119196419c8),
+ U64_C (0x70923b4949394972), U64_C (0x9aaf5fd9d943d986),
+ U64_C (0x1df931f2f2eff2c3), U64_C (0x48dba8e3e3abe34b),
+ U64_C (0x2ab6b95b5b715be2), U64_C (0x920dbc88881a8834),
+ U64_C (0xc8293e9a9a529aa4), U64_C (0xbe4c0b262698262d),
+ U64_C (0xfa64bf3232c8328d), U64_C (0x4a7d59b0b0fab0e9),
+ U64_C (0x6acff2e9e983e91b), U64_C (0x331e770f0f3c0f78),
+ U64_C (0xa6b733d5d573d5e6), U64_C (0xba1df480803a8074),
+ U64_C (0x7c6127bebec2be99), U64_C (0xde87ebcdcd13cd26),
+ U64_C (0xe468893434d034bd), U64_C (0x75903248483d487a),
+ U64_C (0x24e354ffffdbffab), U64_C (0x8ff48d7a7af57af7),
+ U64_C (0xea3d6490907a90f4), U64_C (0x3ebe9d5f5f615fc2),
+ U64_C (0xa0403d202080201d), U64_C (0xd5d00f6868bd6867),
+ U64_C (0x7234ca1a1a681ad0), U64_C (0x2c41b7aeae82ae19),
+ U64_C (0x5e757db4b4eab4c9), U64_C (0x19a8ce54544d549a),
+ U64_C (0xe53b7f93937693ec), U64_C (0xaa442f222288220d),
+ U64_C (0xe9c86364648d6407), U64_C (0x12ff2af1f1e3f1db),
+ U64_C (0xa2e6cc7373d173bf), U64_C (0x5a24821212481290),
+ U64_C (0x5d807a40401d403a), U64_C (0x2810480808200840),
+ U64_C (0xe89b95c3c32bc356), U64_C (0x7bc5dfecec97ec33),
+ U64_C (0x90ab4ddbdb4bdb96), U64_C (0x1f5fc0a1a1bea161),
+ U64_C (0x8307918d8d0e8d1c), U64_C (0xc97ac83d3df43df5),
+ U64_C (0xf1335b97976697cc), U64_C (0x0000000000000000),
+ U64_C (0xd483f9cfcf1bcf36), U64_C (0x87566e2b2bac2b45),
+ U64_C (0xb3ece17676c57697), U64_C (0xb019e68282328264),
+ U64_C (0xa9b128d6d67fd6fe), U64_C (0x7736c31b1b6c1bd8),
+ U64_C (0x5b7774b5b5eeb5c1), U64_C (0x2943beafaf86af11),
+ U64_C (0xdfd41d6a6ab56a77), U64_C (0x0da0ea50505d50ba),
+ U64_C (0x4c8a574545094512), U64_C (0x18fb38f3f3ebf3cb),
+ U64_C (0xf060ad3030c0309d), U64_C (0x74c3c4efef9bef2b),
+ U64_C (0xc37eda3f3ffc3fe5), U64_C (0x1caac75555495592),
+ U64_C (0x1059dba2a2b2a279), U64_C (0x65c9e9eaea8fea03),
+ U64_C (0xecca6a656589650f), U64_C (0x686903babad2bab9),
+ U64_C (0x935e4a2f2fbc2f65), U64_C (0xe79d8ec0c027c04e),
+ U64_C (0x81a160dede5fdebe), U64_C (0x6c38fc1c1c701ce0),
+ U64_C (0x2ee746fdfdd3fdbb), U64_C (0x649a1f4d4d294d52),
+ U64_C (0xe0397692927292e4), U64_C (0xbceafa7575c9758f),
+ U64_C (0x1e0c360606180630), U64_C (0x9809ae8a8a128a24),
+ U64_C (0x40794bb2b2f2b2f9), U64_C (0x59d185e6e6bfe663),
+ U64_C (0x361c7e0e0e380e70), U64_C (0x633ee71f1f7c1ff8),
+ U64_C (0xf7c4556262956237), U64_C (0xa3b53ad4d477d4ee),
+ U64_C (0x324d81a8a89aa829), U64_C (0xf4315296966296c4),
+ U64_C (0x3aef62f9f9c3f99b), U64_C (0xf697a3c5c533c566),
+ U64_C (0xb14a102525942535), U64_C (0x20b2ab59597959f2),
+ U64_C (0xae15d084842a8454), U64_C (0xa7e4c57272d572b7),
+ U64_C (0xdd72ec3939e439d5), U64_C (0x6198164c4c2d4c5a),
+ U64_C (0x3bbc945e5e655eca), U64_C (0x85f09f7878fd78e7),
+ U64_C (0xd870e53838e038dd), U64_C (0x8605988c8c0a8c14),
+ U64_C (0xb2bf17d1d163d1c6), U64_C (0x0b57e4a5a5aea541),
+ U64_C (0x4dd9a1e2e2afe243), U64_C (0xf8c24e616199612f),
+ U64_C (0x457b42b3b3f6b3f1), U64_C (0xa542342121842115),
+ U64_C (0xd625089c9c4a9c94), U64_C (0x663cee1e1e781ef0),
+ U64_C (0x5286614343114322), U64_C (0xfc93b1c7c73bc776),
+ U64_C (0x2be54ffcfcd7fcb3), U64_C (0x1408240404100420),
+ U64_C (0x08a2e351515951b2), U64_C (0xc72f2599995e99bc),
+ U64_C (0xc4da226d6da96d4f), U64_C (0x391a650d0d340d68),
+ U64_C (0x35e979fafacffa83), U64_C (0x84a369dfdf5bdfb6),
+ U64_C (0x9bfca97e7ee57ed7), U64_C (0xb44819242490243d),
+ U64_C (0xd776fe3b3bec3bc5), U64_C (0x3d4b9aabab96ab31),
+ U64_C (0xd181f0cece1fce3e), U64_C (0x5522991111441188),
+ U64_C (0x8903838f8f068f0c), U64_C (0x6b9c044e4e254e4a),
+ U64_C (0x517366b7b7e6b7d1), U64_C (0x60cbe0ebeb8beb0b),
+ U64_C (0xcc78c13c3cf03cfd), U64_C (0xbf1ffd81813e817c),
+ U64_C (0xfe354094946a94d4), U64_C (0x0cf31cf7f7fbf7eb),
+ U64_C (0x676f18b9b9deb9a1), U64_C (0x5f268b13134c1398),
+ U64_C (0x9c58512c2cb02c7d), U64_C (0xb8bb05d3d36bd3d6),
+ U64_C (0x5cd38ce7e7bbe76b), U64_C (0xcbdc396e6ea56e57),
+ U64_C (0xf395aac4c437c46e), U64_C (0x0f061b03030c0318),
+ U64_C (0x13acdc565645568a), U64_C (0x49885e44440d441a),
+ U64_C (0x9efea07f7fe17fdf), U64_C (0x374f88a9a99ea921),
+ U64_C (0x8254672a2aa82a4d), U64_C (0x6d6b0abbbbd6bbb1),
+ U64_C (0xe29f87c1c123c146), U64_C (0x02a6f153535153a2),
+ U64_C (0x8ba572dcdc57dcae), U64_C (0x2716530b0b2c0b58),
+ U64_C (0xd327019d9d4e9d9c), U64_C (0xc1d82b6c6cad6c47),
+ U64_C (0xf562a43131c43195), U64_C (0xb9e8f37474cd7487),
+ U64_C (0x09f115f6f6fff6e3), U64_C (0x438c4c464605460a),
+ U64_C (0x2645a5acac8aac09), U64_C (0x970fb589891e893c),
+ U64_C (0x4428b414145014a0), U64_C (0x42dfbae1e1a3e15b),
+ U64_C (0x4e2ca616165816b0), U64_C (0xd274f73a3ae83acd),
+ U64_C (0xd0d2066969b9696f), U64_C (0x2d12410909240948),
+ U64_C (0xade0d77070dd70a7), U64_C (0x54716fb6b6e2b6d9),
+ U64_C (0xb7bd1ed0d067d0ce), U64_C (0x7ec7d6eded93ed3b),
+ U64_C (0xdb85e2cccc17cc2e), U64_C (0x578468424215422a),
+ U64_C (0xc22d2c98985a98b4), U64_C (0x0e55eda4a4aaa449),
+ U64_C (0x8850752828a0285d), U64_C (0x31b8865c5c6d5cda),
+ U64_C (0x3fed6bf8f8c7f893), U64_C (0xa411c28686228644),
+ }, {
+ U64_C (0xc07830d818186018), U64_C (0x05af462623238c23),
+ U64_C (0x7ef991b8c6c63fc6), U64_C (0x136fcdfbe8e887e8),
+ U64_C (0x4ca113cb87872687), U64_C (0xa9626d11b8b8dab8),
+ U64_C (0x0805020901010401), U64_C (0x426e9e0d4f4f214f),
+ U64_C (0xadee6c9b3636d836), U64_C (0x590451ffa6a6a2a6),
+ U64_C (0xdebdb90cd2d26fd2), U64_C (0xfb06f70ef5f5f3f5),
+ U64_C (0xef80f2967979f979), U64_C (0x5fcede306f6fa16f),
+ U64_C (0xfcef3f6d91917e91), U64_C (0xaa07a4f852525552),
+ U64_C (0x27fdc04760609d60), U64_C (0x89766535bcbccabc),
+ U64_C (0xaccd2b379b9b569b), U64_C (0x048c018a8e8e028e),
+ U64_C (0x71155bd2a3a3b6a3), U64_C (0x603c186c0c0c300c),
+ U64_C (0xff8af6847b7bf17b), U64_C (0xb5e16a803535d435),
+ U64_C (0xe8693af51d1d741d), U64_C (0x5347ddb3e0e0a7e0),
+ U64_C (0xf6acb321d7d77bd7), U64_C (0x5eed999cc2c22fc2),
+ U64_C (0x6d965c432e2eb82e), U64_C (0x627a96294b4b314b),
+ U64_C (0xa321e15dfefedffe), U64_C (0x8216aed557574157),
+ U64_C (0xa8412abd15155415), U64_C (0x9fb6eee87777c177),
+ U64_C (0xa5eb6e923737dc37), U64_C (0x7b56d79ee5e5b3e5),
+ U64_C (0x8cd923139f9f469f), U64_C (0xd317fd23f0f0e7f0),
+ U64_C (0x6a7f94204a4a354a), U64_C (0x9e95a944dada4fda),
+ U64_C (0xfa25b0a258587d58), U64_C (0x06ca8fcfc9c903c9),
+ U64_C (0x558d527c2929a429), U64_C (0x5022145a0a0a280a),
+ U64_C (0xe14f7f50b1b1feb1), U64_C (0x691a5dc9a0a0baa0),
+ U64_C (0x7fdad6146b6bb16b), U64_C (0x5cab17d985852e85),
+ U64_C (0x8173673cbdbdcebd), U64_C (0xd234ba8f5d5d695d),
+ U64_C (0x8050209010104010), U64_C (0xf303f507f4f4f7f4),
+ U64_C (0x16c08bddcbcb0bcb), U64_C (0xedc67cd33e3ef83e),
+ U64_C (0x28110a2d05051405), U64_C (0x1fe6ce7867678167),
+ U64_C (0x7353d597e4e4b7e4), U64_C (0x25bb4e0227279c27),
+ U64_C (0x3258827341411941), U64_C (0x2c9d0ba78b8b168b),
+ U64_C (0x510153f6a7a7a6a7), U64_C (0xcf94fab27d7de97d),
+ U64_C (0xdcfb374995956e95), U64_C (0x8e9fad56d8d847d8),
+ U64_C (0x8b30eb70fbfbcbfb), U64_C (0x2371c1cdeeee9fee),
+ U64_C (0xc791f8bb7c7ced7c), U64_C (0x17e3cc7166668566),
+ U64_C (0xa68ea77bdddd53dd), U64_C (0xb84b2eaf17175c17),
+ U64_C (0x02468e4547470147), U64_C (0x84dc211a9e9e429e),
+ U64_C (0x1ec589d4caca0fca), U64_C (0x75995a582d2db42d),
+ U64_C (0x9179632ebfbfc6bf), U64_C (0x381b0e3f07071c07),
+ U64_C (0x012347acadad8ead), U64_C (0xea2fb4b05a5a755a),
+ U64_C (0x6cb51bef83833683), U64_C (0x85ff66b63333cc33),
+ U64_C (0x3ff2c65c63639163), U64_C (0x100a041202020802),
+ U64_C (0x39384993aaaa92aa), U64_C (0xafa8e2de7171d971),
+ U64_C (0x0ecf8dc6c8c807c8), U64_C (0xc87d32d119196419),
+ U64_C (0x7270923b49493949), U64_C (0x869aaf5fd9d943d9),
+ U64_C (0xc31df931f2f2eff2), U64_C (0x4b48dba8e3e3abe3),
+ U64_C (0xe22ab6b95b5b715b), U64_C (0x34920dbc88881a88),
+ U64_C (0xa4c8293e9a9a529a), U64_C (0x2dbe4c0b26269826),
+ U64_C (0x8dfa64bf3232c832), U64_C (0xe94a7d59b0b0fab0),
+ U64_C (0x1b6acff2e9e983e9), U64_C (0x78331e770f0f3c0f),
+ U64_C (0xe6a6b733d5d573d5), U64_C (0x74ba1df480803a80),
+ U64_C (0x997c6127bebec2be), U64_C (0x26de87ebcdcd13cd),
+ U64_C (0xbde468893434d034), U64_C (0x7a75903248483d48),
+ U64_C (0xab24e354ffffdbff), U64_C (0xf78ff48d7a7af57a),
+ U64_C (0xf4ea3d6490907a90), U64_C (0xc23ebe9d5f5f615f),
+ U64_C (0x1da0403d20208020), U64_C (0x67d5d00f6868bd68),
+ U64_C (0xd07234ca1a1a681a), U64_C (0x192c41b7aeae82ae),
+ U64_C (0xc95e757db4b4eab4), U64_C (0x9a19a8ce54544d54),
+ U64_C (0xece53b7f93937693), U64_C (0x0daa442f22228822),
+ U64_C (0x07e9c86364648d64), U64_C (0xdb12ff2af1f1e3f1),
+ U64_C (0xbfa2e6cc7373d173), U64_C (0x905a248212124812),
+ U64_C (0x3a5d807a40401d40), U64_C (0x4028104808082008),
+ U64_C (0x56e89b95c3c32bc3), U64_C (0x337bc5dfecec97ec),
+ U64_C (0x9690ab4ddbdb4bdb), U64_C (0x611f5fc0a1a1bea1),
+ U64_C (0x1c8307918d8d0e8d), U64_C (0xf5c97ac83d3df43d),
+ U64_C (0xccf1335b97976697), U64_C (0x0000000000000000),
+ U64_C (0x36d483f9cfcf1bcf), U64_C (0x4587566e2b2bac2b),
+ U64_C (0x97b3ece17676c576), U64_C (0x64b019e682823282),
+ U64_C (0xfea9b128d6d67fd6), U64_C (0xd87736c31b1b6c1b),
+ U64_C (0xc15b7774b5b5eeb5), U64_C (0x112943beafaf86af),
+ U64_C (0x77dfd41d6a6ab56a), U64_C (0xba0da0ea50505d50),
+ U64_C (0x124c8a5745450945), U64_C (0xcb18fb38f3f3ebf3),
+ U64_C (0x9df060ad3030c030), U64_C (0x2b74c3c4efef9bef),
+ U64_C (0xe5c37eda3f3ffc3f), U64_C (0x921caac755554955),
+ U64_C (0x791059dba2a2b2a2), U64_C (0x0365c9e9eaea8fea),
+ U64_C (0x0fecca6a65658965), U64_C (0xb9686903babad2ba),
+ U64_C (0x65935e4a2f2fbc2f), U64_C (0x4ee79d8ec0c027c0),
+ U64_C (0xbe81a160dede5fde), U64_C (0xe06c38fc1c1c701c),
+ U64_C (0xbb2ee746fdfdd3fd), U64_C (0x52649a1f4d4d294d),
+ U64_C (0xe4e0397692927292), U64_C (0x8fbceafa7575c975),
+ U64_C (0x301e0c3606061806), U64_C (0x249809ae8a8a128a),
+ U64_C (0xf940794bb2b2f2b2), U64_C (0x6359d185e6e6bfe6),
+ U64_C (0x70361c7e0e0e380e), U64_C (0xf8633ee71f1f7c1f),
+ U64_C (0x37f7c45562629562), U64_C (0xeea3b53ad4d477d4),
+ U64_C (0x29324d81a8a89aa8), U64_C (0xc4f4315296966296),
+ U64_C (0x9b3aef62f9f9c3f9), U64_C (0x66f697a3c5c533c5),
+ U64_C (0x35b14a1025259425), U64_C (0xf220b2ab59597959),
+ U64_C (0x54ae15d084842a84), U64_C (0xb7a7e4c57272d572),
+ U64_C (0xd5dd72ec3939e439), U64_C (0x5a6198164c4c2d4c),
+ U64_C (0xca3bbc945e5e655e), U64_C (0xe785f09f7878fd78),
+ U64_C (0xddd870e53838e038), U64_C (0x148605988c8c0a8c),
+ U64_C (0xc6b2bf17d1d163d1), U64_C (0x410b57e4a5a5aea5),
+ U64_C (0x434dd9a1e2e2afe2), U64_C (0x2ff8c24e61619961),
+ U64_C (0xf1457b42b3b3f6b3), U64_C (0x15a5423421218421),
+ U64_C (0x94d625089c9c4a9c), U64_C (0xf0663cee1e1e781e),
+ U64_C (0x2252866143431143), U64_C (0x76fc93b1c7c73bc7),
+ U64_C (0xb32be54ffcfcd7fc), U64_C (0x2014082404041004),
+ U64_C (0xb208a2e351515951), U64_C (0xbcc72f2599995e99),
+ U64_C (0x4fc4da226d6da96d), U64_C (0x68391a650d0d340d),
+ U64_C (0x8335e979fafacffa), U64_C (0xb684a369dfdf5bdf),
+ U64_C (0xd79bfca97e7ee57e), U64_C (0x3db4481924249024),
+ U64_C (0xc5d776fe3b3bec3b), U64_C (0x313d4b9aabab96ab),
+ U64_C (0x3ed181f0cece1fce), U64_C (0x8855229911114411),
+ U64_C (0x0c8903838f8f068f), U64_C (0x4a6b9c044e4e254e),
+ U64_C (0xd1517366b7b7e6b7), U64_C (0x0b60cbe0ebeb8beb),
+ U64_C (0xfdcc78c13c3cf03c), U64_C (0x7cbf1ffd81813e81),
+ U64_C (0xd4fe354094946a94), U64_C (0xeb0cf31cf7f7fbf7),
+ U64_C (0xa1676f18b9b9deb9), U64_C (0x985f268b13134c13),
+ U64_C (0x7d9c58512c2cb02c), U64_C (0xd6b8bb05d3d36bd3),
+ U64_C (0x6b5cd38ce7e7bbe7), U64_C (0x57cbdc396e6ea56e),
+ U64_C (0x6ef395aac4c437c4), U64_C (0x180f061b03030c03),
+ U64_C (0x8a13acdc56564556), U64_C (0x1a49885e44440d44),
+ U64_C (0xdf9efea07f7fe17f), U64_C (0x21374f88a9a99ea9),
+ U64_C (0x4d8254672a2aa82a), U64_C (0xb16d6b0abbbbd6bb),
+ U64_C (0x46e29f87c1c123c1), U64_C (0xa202a6f153535153),
+ U64_C (0xae8ba572dcdc57dc), U64_C (0x582716530b0b2c0b),
+ U64_C (0x9cd327019d9d4e9d), U64_C (0x47c1d82b6c6cad6c),
+ U64_C (0x95f562a43131c431), U64_C (0x87b9e8f37474cd74),
+ U64_C (0xe309f115f6f6fff6), U64_C (0x0a438c4c46460546),
+ U64_C (0x092645a5acac8aac), U64_C (0x3c970fb589891e89),
+ U64_C (0xa04428b414145014), U64_C (0x5b42dfbae1e1a3e1),
+ U64_C (0xb04e2ca616165816), U64_C (0xcdd274f73a3ae83a),
+ U64_C (0x6fd0d2066969b969), U64_C (0x482d124109092409),
+ U64_C (0xa7ade0d77070dd70), U64_C (0xd954716fb6b6e2b6),
+ U64_C (0xceb7bd1ed0d067d0), U64_C (0x3b7ec7d6eded93ed),
+ U64_C (0x2edb85e2cccc17cc), U64_C (0x2a57846842421542),
+ U64_C (0xb4c22d2c98985a98), U64_C (0x490e55eda4a4aaa4),
+ U64_C (0x5d8850752828a028), U64_C (0xda31b8865c5c6d5c),
+ U64_C (0x933fed6bf8f8c7f8), U64_C (0x44a411c286862286),
+ }, {
+ U64_C (0x18c07830d8181860), U64_C (0x2305af462623238c),
+ U64_C (0xc67ef991b8c6c63f), U64_C (0xe8136fcdfbe8e887),
+ U64_C (0x874ca113cb878726), U64_C (0xb8a9626d11b8b8da),
+ U64_C (0x0108050209010104), U64_C (0x4f426e9e0d4f4f21),
+ U64_C (0x36adee6c9b3636d8), U64_C (0xa6590451ffa6a6a2),
+ U64_C (0xd2debdb90cd2d26f), U64_C (0xf5fb06f70ef5f5f3),
+ U64_C (0x79ef80f2967979f9), U64_C (0x6f5fcede306f6fa1),
+ U64_C (0x91fcef3f6d91917e), U64_C (0x52aa07a4f8525255),
+ U64_C (0x6027fdc04760609d), U64_C (0xbc89766535bcbcca),
+ U64_C (0x9baccd2b379b9b56), U64_C (0x8e048c018a8e8e02),
+ U64_C (0xa371155bd2a3a3b6), U64_C (0x0c603c186c0c0c30),
+ U64_C (0x7bff8af6847b7bf1), U64_C (0x35b5e16a803535d4),
+ U64_C (0x1de8693af51d1d74), U64_C (0xe05347ddb3e0e0a7),
+ U64_C (0xd7f6acb321d7d77b), U64_C (0xc25eed999cc2c22f),
+ U64_C (0x2e6d965c432e2eb8), U64_C (0x4b627a96294b4b31),
+ U64_C (0xfea321e15dfefedf), U64_C (0x578216aed5575741),
+ U64_C (0x15a8412abd151554), U64_C (0x779fb6eee87777c1),
+ U64_C (0x37a5eb6e923737dc), U64_C (0xe57b56d79ee5e5b3),
+ U64_C (0x9f8cd923139f9f46), U64_C (0xf0d317fd23f0f0e7),
+ U64_C (0x4a6a7f94204a4a35), U64_C (0xda9e95a944dada4f),
+ U64_C (0x58fa25b0a258587d), U64_C (0xc906ca8fcfc9c903),
+ U64_C (0x29558d527c2929a4), U64_C (0x0a5022145a0a0a28),
+ U64_C (0xb1e14f7f50b1b1fe), U64_C (0xa0691a5dc9a0a0ba),
+ U64_C (0x6b7fdad6146b6bb1), U64_C (0x855cab17d985852e),
+ U64_C (0xbd8173673cbdbdce), U64_C (0x5dd234ba8f5d5d69),
+ U64_C (0x1080502090101040), U64_C (0xf4f303f507f4f4f7),
+ U64_C (0xcb16c08bddcbcb0b), U64_C (0x3eedc67cd33e3ef8),
+ U64_C (0x0528110a2d050514), U64_C (0x671fe6ce78676781),
+ U64_C (0xe47353d597e4e4b7), U64_C (0x2725bb4e0227279c),
+ U64_C (0x4132588273414119), U64_C (0x8b2c9d0ba78b8b16),
+ U64_C (0xa7510153f6a7a7a6), U64_C (0x7dcf94fab27d7de9),
+ U64_C (0x95dcfb374995956e), U64_C (0xd88e9fad56d8d847),
+ U64_C (0xfb8b30eb70fbfbcb), U64_C (0xee2371c1cdeeee9f),
+ U64_C (0x7cc791f8bb7c7ced), U64_C (0x6617e3cc71666685),
+ U64_C (0xdda68ea77bdddd53), U64_C (0x17b84b2eaf17175c),
+ U64_C (0x4702468e45474701), U64_C (0x9e84dc211a9e9e42),
+ U64_C (0xca1ec589d4caca0f), U64_C (0x2d75995a582d2db4),
+ U64_C (0xbf9179632ebfbfc6), U64_C (0x07381b0e3f07071c),
+ U64_C (0xad012347acadad8e), U64_C (0x5aea2fb4b05a5a75),
+ U64_C (0x836cb51bef838336), U64_C (0x3385ff66b63333cc),
+ U64_C (0x633ff2c65c636391), U64_C (0x02100a0412020208),
+ U64_C (0xaa39384993aaaa92), U64_C (0x71afa8e2de7171d9),
+ U64_C (0xc80ecf8dc6c8c807), U64_C (0x19c87d32d1191964),
+ U64_C (0x497270923b494939), U64_C (0xd9869aaf5fd9d943),
+ U64_C (0xf2c31df931f2f2ef), U64_C (0xe34b48dba8e3e3ab),
+ U64_C (0x5be22ab6b95b5b71), U64_C (0x8834920dbc88881a),
+ U64_C (0x9aa4c8293e9a9a52), U64_C (0x262dbe4c0b262698),
+ U64_C (0x328dfa64bf3232c8), U64_C (0xb0e94a7d59b0b0fa),
+ U64_C (0xe91b6acff2e9e983), U64_C (0x0f78331e770f0f3c),
+ U64_C (0xd5e6a6b733d5d573), U64_C (0x8074ba1df480803a),
+ U64_C (0xbe997c6127bebec2), U64_C (0xcd26de87ebcdcd13),
+ U64_C (0x34bde468893434d0), U64_C (0x487a75903248483d),
+ U64_C (0xffab24e354ffffdb), U64_C (0x7af78ff48d7a7af5),
+ U64_C (0x90f4ea3d6490907a), U64_C (0x5fc23ebe9d5f5f61),
+ U64_C (0x201da0403d202080), U64_C (0x6867d5d00f6868bd),
+ U64_C (0x1ad07234ca1a1a68), U64_C (0xae192c41b7aeae82),
+ U64_C (0xb4c95e757db4b4ea), U64_C (0x549a19a8ce54544d),
+ U64_C (0x93ece53b7f939376), U64_C (0x220daa442f222288),
+ U64_C (0x6407e9c86364648d), U64_C (0xf1db12ff2af1f1e3),
+ U64_C (0x73bfa2e6cc7373d1), U64_C (0x12905a2482121248),
+ U64_C (0x403a5d807a40401d), U64_C (0x0840281048080820),
+ U64_C (0xc356e89b95c3c32b), U64_C (0xec337bc5dfecec97),
+ U64_C (0xdb9690ab4ddbdb4b), U64_C (0xa1611f5fc0a1a1be),
+ U64_C (0x8d1c8307918d8d0e), U64_C (0x3df5c97ac83d3df4),
+ U64_C (0x97ccf1335b979766), U64_C (0x0000000000000000),
+ U64_C (0xcf36d483f9cfcf1b), U64_C (0x2b4587566e2b2bac),
+ U64_C (0x7697b3ece17676c5), U64_C (0x8264b019e6828232),
+ U64_C (0xd6fea9b128d6d67f), U64_C (0x1bd87736c31b1b6c),
+ U64_C (0xb5c15b7774b5b5ee), U64_C (0xaf112943beafaf86),
+ U64_C (0x6a77dfd41d6a6ab5), U64_C (0x50ba0da0ea50505d),
+ U64_C (0x45124c8a57454509), U64_C (0xf3cb18fb38f3f3eb),
+ U64_C (0x309df060ad3030c0), U64_C (0xef2b74c3c4efef9b),
+ U64_C (0x3fe5c37eda3f3ffc), U64_C (0x55921caac7555549),
+ U64_C (0xa2791059dba2a2b2), U64_C (0xea0365c9e9eaea8f),
+ U64_C (0x650fecca6a656589), U64_C (0xbab9686903babad2),
+ U64_C (0x2f65935e4a2f2fbc), U64_C (0xc04ee79d8ec0c027),
+ U64_C (0xdebe81a160dede5f), U64_C (0x1ce06c38fc1c1c70),
+ U64_C (0xfdbb2ee746fdfdd3), U64_C (0x4d52649a1f4d4d29),
+ U64_C (0x92e4e03976929272), U64_C (0x758fbceafa7575c9),
+ U64_C (0x06301e0c36060618), U64_C (0x8a249809ae8a8a12),
+ U64_C (0xb2f940794bb2b2f2), U64_C (0xe66359d185e6e6bf),
+ U64_C (0x0e70361c7e0e0e38), U64_C (0x1ff8633ee71f1f7c),
+ U64_C (0x6237f7c455626295), U64_C (0xd4eea3b53ad4d477),
+ U64_C (0xa829324d81a8a89a), U64_C (0x96c4f43152969662),
+ U64_C (0xf99b3aef62f9f9c3), U64_C (0xc566f697a3c5c533),
+ U64_C (0x2535b14a10252594), U64_C (0x59f220b2ab595979),
+ U64_C (0x8454ae15d084842a), U64_C (0x72b7a7e4c57272d5),
+ U64_C (0x39d5dd72ec3939e4), U64_C (0x4c5a6198164c4c2d),
+ U64_C (0x5eca3bbc945e5e65), U64_C (0x78e785f09f7878fd),
+ U64_C (0x38ddd870e53838e0), U64_C (0x8c148605988c8c0a),
+ U64_C (0xd1c6b2bf17d1d163), U64_C (0xa5410b57e4a5a5ae),
+ U64_C (0xe2434dd9a1e2e2af), U64_C (0x612ff8c24e616199),
+ U64_C (0xb3f1457b42b3b3f6), U64_C (0x2115a54234212184),
+ U64_C (0x9c94d625089c9c4a), U64_C (0x1ef0663cee1e1e78),
+ U64_C (0x4322528661434311), U64_C (0xc776fc93b1c7c73b),
+ U64_C (0xfcb32be54ffcfcd7), U64_C (0x0420140824040410),
+ U64_C (0x51b208a2e3515159), U64_C (0x99bcc72f2599995e),
+ U64_C (0x6d4fc4da226d6da9), U64_C (0x0d68391a650d0d34),
+ U64_C (0xfa8335e979fafacf), U64_C (0xdfb684a369dfdf5b),
+ U64_C (0x7ed79bfca97e7ee5), U64_C (0x243db44819242490),
+ U64_C (0x3bc5d776fe3b3bec), U64_C (0xab313d4b9aabab96),
+ U64_C (0xce3ed181f0cece1f), U64_C (0x1188552299111144),
+ U64_C (0x8f0c8903838f8f06), U64_C (0x4e4a6b9c044e4e25),
+ U64_C (0xb7d1517366b7b7e6), U64_C (0xeb0b60cbe0ebeb8b),
+ U64_C (0x3cfdcc78c13c3cf0), U64_C (0x817cbf1ffd81813e),
+ U64_C (0x94d4fe354094946a), U64_C (0xf7eb0cf31cf7f7fb),
+ U64_C (0xb9a1676f18b9b9de), U64_C (0x13985f268b13134c),
+ U64_C (0x2c7d9c58512c2cb0), U64_C (0xd3d6b8bb05d3d36b),
+ U64_C (0xe76b5cd38ce7e7bb), U64_C (0x6e57cbdc396e6ea5),
+ U64_C (0xc46ef395aac4c437), U64_C (0x03180f061b03030c),
+ U64_C (0x568a13acdc565645), U64_C (0x441a49885e44440d),
+ U64_C (0x7fdf9efea07f7fe1), U64_C (0xa921374f88a9a99e),
+ U64_C (0x2a4d8254672a2aa8), U64_C (0xbbb16d6b0abbbbd6),
+ U64_C (0xc146e29f87c1c123), U64_C (0x53a202a6f1535351),
+ U64_C (0xdcae8ba572dcdc57), U64_C (0x0b582716530b0b2c),
+ U64_C (0x9d9cd327019d9d4e), U64_C (0x6c47c1d82b6c6cad),
+ U64_C (0x3195f562a43131c4), U64_C (0x7487b9e8f37474cd),
+ U64_C (0xf6e309f115f6f6ff), U64_C (0x460a438c4c464605),
+ U64_C (0xac092645a5acac8a), U64_C (0x893c970fb589891e),
+ U64_C (0x14a04428b4141450), U64_C (0xe15b42dfbae1e1a3),
+ U64_C (0x16b04e2ca6161658), U64_C (0x3acdd274f73a3ae8),
+ U64_C (0x696fd0d2066969b9), U64_C (0x09482d1241090924),
+ U64_C (0x70a7ade0d77070dd), U64_C (0xb6d954716fb6b6e2),
+ U64_C (0xd0ceb7bd1ed0d067), U64_C (0xed3b7ec7d6eded93),
+ U64_C (0xcc2edb85e2cccc17), U64_C (0x422a578468424215),
+ U64_C (0x98b4c22d2c98985a), U64_C (0xa4490e55eda4a4aa),
+ U64_C (0x285d8850752828a0), U64_C (0x5cda31b8865c5c6d),
+ U64_C (0xf8933fed6bf8f8c7), U64_C (0x8644a411c2868622),
+ }, {
+ U64_C (0x6018c07830d81818), U64_C (0x8c2305af46262323),
+ U64_C (0x3fc67ef991b8c6c6), U64_C (0x87e8136fcdfbe8e8),
+ U64_C (0x26874ca113cb8787), U64_C (0xdab8a9626d11b8b8),
+ U64_C (0x0401080502090101), U64_C (0x214f426e9e0d4f4f),
+ U64_C (0xd836adee6c9b3636), U64_C (0xa2a6590451ffa6a6),
+ U64_C (0x6fd2debdb90cd2d2), U64_C (0xf3f5fb06f70ef5f5),
+ U64_C (0xf979ef80f2967979), U64_C (0xa16f5fcede306f6f),
+ U64_C (0x7e91fcef3f6d9191), U64_C (0x5552aa07a4f85252),
+ U64_C (0x9d6027fdc0476060), U64_C (0xcabc89766535bcbc),
+ U64_C (0x569baccd2b379b9b), U64_C (0x028e048c018a8e8e),
+ U64_C (0xb6a371155bd2a3a3), U64_C (0x300c603c186c0c0c),
+ U64_C (0xf17bff8af6847b7b), U64_C (0xd435b5e16a803535),
+ U64_C (0x741de8693af51d1d), U64_C (0xa7e05347ddb3e0e0),
+ U64_C (0x7bd7f6acb321d7d7), U64_C (0x2fc25eed999cc2c2),
+ U64_C (0xb82e6d965c432e2e), U64_C (0x314b627a96294b4b),
+ U64_C (0xdffea321e15dfefe), U64_C (0x41578216aed55757),
+ U64_C (0x5415a8412abd1515), U64_C (0xc1779fb6eee87777),
+ U64_C (0xdc37a5eb6e923737), U64_C (0xb3e57b56d79ee5e5),
+ U64_C (0x469f8cd923139f9f), U64_C (0xe7f0d317fd23f0f0),
+ U64_C (0x354a6a7f94204a4a), U64_C (0x4fda9e95a944dada),
+ U64_C (0x7d58fa25b0a25858), U64_C (0x03c906ca8fcfc9c9),
+ U64_C (0xa429558d527c2929), U64_C (0x280a5022145a0a0a),
+ U64_C (0xfeb1e14f7f50b1b1), U64_C (0xbaa0691a5dc9a0a0),
+ U64_C (0xb16b7fdad6146b6b), U64_C (0x2e855cab17d98585),
+ U64_C (0xcebd8173673cbdbd), U64_C (0x695dd234ba8f5d5d),
+ U64_C (0x4010805020901010), U64_C (0xf7f4f303f507f4f4),
+ U64_C (0x0bcb16c08bddcbcb), U64_C (0xf83eedc67cd33e3e),
+ U64_C (0x140528110a2d0505), U64_C (0x81671fe6ce786767),
+ U64_C (0xb7e47353d597e4e4), U64_C (0x9c2725bb4e022727),
+ U64_C (0x1941325882734141), U64_C (0x168b2c9d0ba78b8b),
+ U64_C (0xa6a7510153f6a7a7), U64_C (0xe97dcf94fab27d7d),
+ U64_C (0x6e95dcfb37499595), U64_C (0x47d88e9fad56d8d8),
+ U64_C (0xcbfb8b30eb70fbfb), U64_C (0x9fee2371c1cdeeee),
+ U64_C (0xed7cc791f8bb7c7c), U64_C (0x856617e3cc716666),
+ U64_C (0x53dda68ea77bdddd), U64_C (0x5c17b84b2eaf1717),
+ U64_C (0x014702468e454747), U64_C (0x429e84dc211a9e9e),
+ U64_C (0x0fca1ec589d4caca), U64_C (0xb42d75995a582d2d),
+ U64_C (0xc6bf9179632ebfbf), U64_C (0x1c07381b0e3f0707),
+ U64_C (0x8ead012347acadad), U64_C (0x755aea2fb4b05a5a),
+ U64_C (0x36836cb51bef8383), U64_C (0xcc3385ff66b63333),
+ U64_C (0x91633ff2c65c6363), U64_C (0x0802100a04120202),
+ U64_C (0x92aa39384993aaaa), U64_C (0xd971afa8e2de7171),
+ U64_C (0x07c80ecf8dc6c8c8), U64_C (0x6419c87d32d11919),
+ U64_C (0x39497270923b4949), U64_C (0x43d9869aaf5fd9d9),
+ U64_C (0xeff2c31df931f2f2), U64_C (0xabe34b48dba8e3e3),
+ U64_C (0x715be22ab6b95b5b), U64_C (0x1a8834920dbc8888),
+ U64_C (0x529aa4c8293e9a9a), U64_C (0x98262dbe4c0b2626),
+ U64_C (0xc8328dfa64bf3232), U64_C (0xfab0e94a7d59b0b0),
+ U64_C (0x83e91b6acff2e9e9), U64_C (0x3c0f78331e770f0f),
+ U64_C (0x73d5e6a6b733d5d5), U64_C (0x3a8074ba1df48080),
+ U64_C (0xc2be997c6127bebe), U64_C (0x13cd26de87ebcdcd),
+ U64_C (0xd034bde468893434), U64_C (0x3d487a7590324848),
+ U64_C (0xdbffab24e354ffff), U64_C (0xf57af78ff48d7a7a),
+ U64_C (0x7a90f4ea3d649090), U64_C (0x615fc23ebe9d5f5f),
+ U64_C (0x80201da0403d2020), U64_C (0xbd6867d5d00f6868),
+ U64_C (0x681ad07234ca1a1a), U64_C (0x82ae192c41b7aeae),
+ U64_C (0xeab4c95e757db4b4), U64_C (0x4d549a19a8ce5454),
+ U64_C (0x7693ece53b7f9393), U64_C (0x88220daa442f2222),
+ U64_C (0x8d6407e9c8636464), U64_C (0xe3f1db12ff2af1f1),
+ U64_C (0xd173bfa2e6cc7373), U64_C (0x4812905a24821212),
+ U64_C (0x1d403a5d807a4040), U64_C (0x2008402810480808),
+ U64_C (0x2bc356e89b95c3c3), U64_C (0x97ec337bc5dfecec),
+ U64_C (0x4bdb9690ab4ddbdb), U64_C (0xbea1611f5fc0a1a1),
+ U64_C (0x0e8d1c8307918d8d), U64_C (0xf43df5c97ac83d3d),
+ U64_C (0x6697ccf1335b9797), U64_C (0x0000000000000000),
+ U64_C (0x1bcf36d483f9cfcf), U64_C (0xac2b4587566e2b2b),
+ U64_C (0xc57697b3ece17676), U64_C (0x328264b019e68282),
+ U64_C (0x7fd6fea9b128d6d6), U64_C (0x6c1bd87736c31b1b),
+ U64_C (0xeeb5c15b7774b5b5), U64_C (0x86af112943beafaf),
+ U64_C (0xb56a77dfd41d6a6a), U64_C (0x5d50ba0da0ea5050),
+ U64_C (0x0945124c8a574545), U64_C (0xebf3cb18fb38f3f3),
+ U64_C (0xc0309df060ad3030), U64_C (0x9bef2b74c3c4efef),
+ U64_C (0xfc3fe5c37eda3f3f), U64_C (0x4955921caac75555),
+ U64_C (0xb2a2791059dba2a2), U64_C (0x8fea0365c9e9eaea),
+ U64_C (0x89650fecca6a6565), U64_C (0xd2bab9686903baba),
+ U64_C (0xbc2f65935e4a2f2f), U64_C (0x27c04ee79d8ec0c0),
+ U64_C (0x5fdebe81a160dede), U64_C (0x701ce06c38fc1c1c),
+ U64_C (0xd3fdbb2ee746fdfd), U64_C (0x294d52649a1f4d4d),
+ U64_C (0x7292e4e039769292), U64_C (0xc9758fbceafa7575),
+ U64_C (0x1806301e0c360606), U64_C (0x128a249809ae8a8a),
+ U64_C (0xf2b2f940794bb2b2), U64_C (0xbfe66359d185e6e6),
+ U64_C (0x380e70361c7e0e0e), U64_C (0x7c1ff8633ee71f1f),
+ U64_C (0x956237f7c4556262), U64_C (0x77d4eea3b53ad4d4),
+ U64_C (0x9aa829324d81a8a8), U64_C (0x6296c4f431529696),
+ U64_C (0xc3f99b3aef62f9f9), U64_C (0x33c566f697a3c5c5),
+ U64_C (0x942535b14a102525), U64_C (0x7959f220b2ab5959),
+ U64_C (0x2a8454ae15d08484), U64_C (0xd572b7a7e4c57272),
+ U64_C (0xe439d5dd72ec3939), U64_C (0x2d4c5a6198164c4c),
+ U64_C (0x655eca3bbc945e5e), U64_C (0xfd78e785f09f7878),
+ U64_C (0xe038ddd870e53838), U64_C (0x0a8c148605988c8c),
+ U64_C (0x63d1c6b2bf17d1d1), U64_C (0xaea5410b57e4a5a5),
+ U64_C (0xafe2434dd9a1e2e2), U64_C (0x99612ff8c24e6161),
+ U64_C (0xf6b3f1457b42b3b3), U64_C (0x842115a542342121),
+ U64_C (0x4a9c94d625089c9c), U64_C (0x781ef0663cee1e1e),
+ U64_C (0x1143225286614343), U64_C (0x3bc776fc93b1c7c7),
+ U64_C (0xd7fcb32be54ffcfc), U64_C (0x1004201408240404),
+ U64_C (0x5951b208a2e35151), U64_C (0x5e99bcc72f259999),
+ U64_C (0xa96d4fc4da226d6d), U64_C (0x340d68391a650d0d),
+ U64_C (0xcffa8335e979fafa), U64_C (0x5bdfb684a369dfdf),
+ U64_C (0xe57ed79bfca97e7e), U64_C (0x90243db448192424),
+ U64_C (0xec3bc5d776fe3b3b), U64_C (0x96ab313d4b9aabab),
+ U64_C (0x1fce3ed181f0cece), U64_C (0x4411885522991111),
+ U64_C (0x068f0c8903838f8f), U64_C (0x254e4a6b9c044e4e),
+ U64_C (0xe6b7d1517366b7b7), U64_C (0x8beb0b60cbe0ebeb),
+ U64_C (0xf03cfdcc78c13c3c), U64_C (0x3e817cbf1ffd8181),
+ U64_C (0x6a94d4fe35409494), U64_C (0xfbf7eb0cf31cf7f7),
+ U64_C (0xdeb9a1676f18b9b9), U64_C (0x4c13985f268b1313),
+ U64_C (0xb02c7d9c58512c2c), U64_C (0x6bd3d6b8bb05d3d3),
+ U64_C (0xbbe76b5cd38ce7e7), U64_C (0xa56e57cbdc396e6e),
+ U64_C (0x37c46ef395aac4c4), U64_C (0x0c03180f061b0303),
+ U64_C (0x45568a13acdc5656), U64_C (0x0d441a49885e4444),
+ U64_C (0xe17fdf9efea07f7f), U64_C (0x9ea921374f88a9a9),
+ U64_C (0xa82a4d8254672a2a), U64_C (0xd6bbb16d6b0abbbb),
+ U64_C (0x23c146e29f87c1c1), U64_C (0x5153a202a6f15353),
+ U64_C (0x57dcae8ba572dcdc), U64_C (0x2c0b582716530b0b),
+ U64_C (0x4e9d9cd327019d9d), U64_C (0xad6c47c1d82b6c6c),
+ U64_C (0xc43195f562a43131), U64_C (0xcd7487b9e8f37474),
+ U64_C (0xfff6e309f115f6f6), U64_C (0x05460a438c4c4646),
+ U64_C (0x8aac092645a5acac), U64_C (0x1e893c970fb58989),
+ U64_C (0x5014a04428b41414), U64_C (0xa3e15b42dfbae1e1),
+ U64_C (0x5816b04e2ca61616), U64_C (0xe83acdd274f73a3a),
+ U64_C (0xb9696fd0d2066969), U64_C (0x2409482d12410909),
+ U64_C (0xdd70a7ade0d77070), U64_C (0xe2b6d954716fb6b6),
+ U64_C (0x67d0ceb7bd1ed0d0), U64_C (0x93ed3b7ec7d6eded),
+ U64_C (0x17cc2edb85e2cccc), U64_C (0x15422a5784684242),
+ U64_C (0x5a98b4c22d2c9898), U64_C (0xaaa4490e55eda4a4),
+ U64_C (0xa0285d8850752828), U64_C (0x6d5cda31b8865c5c),
+ U64_C (0xc7f8933fed6bf8f8), U64_C (0x228644a411c28686),
+ }, {
+ U64_C (0x186018c07830d818), U64_C (0x238c2305af462623),
+ U64_C (0xc63fc67ef991b8c6), U64_C (0xe887e8136fcdfbe8),
+ U64_C (0x8726874ca113cb87), U64_C (0xb8dab8a9626d11b8),
+ U64_C (0x0104010805020901), U64_C (0x4f214f426e9e0d4f),
+ U64_C (0x36d836adee6c9b36), U64_C (0xa6a2a6590451ffa6),
+ U64_C (0xd26fd2debdb90cd2), U64_C (0xf5f3f5fb06f70ef5),
+ U64_C (0x79f979ef80f29679), U64_C (0x6fa16f5fcede306f),
+ U64_C (0x917e91fcef3f6d91), U64_C (0x525552aa07a4f852),
+ U64_C (0x609d6027fdc04760), U64_C (0xbccabc89766535bc),
+ U64_C (0x9b569baccd2b379b), U64_C (0x8e028e048c018a8e),
+ U64_C (0xa3b6a371155bd2a3), U64_C (0x0c300c603c186c0c),
+ U64_C (0x7bf17bff8af6847b), U64_C (0x35d435b5e16a8035),
+ U64_C (0x1d741de8693af51d), U64_C (0xe0a7e05347ddb3e0),
+ U64_C (0xd77bd7f6acb321d7), U64_C (0xc22fc25eed999cc2),
+ U64_C (0x2eb82e6d965c432e), U64_C (0x4b314b627a96294b),
+ U64_C (0xfedffea321e15dfe), U64_C (0x5741578216aed557),
+ U64_C (0x155415a8412abd15), U64_C (0x77c1779fb6eee877),
+ U64_C (0x37dc37a5eb6e9237), U64_C (0xe5b3e57b56d79ee5),
+ U64_C (0x9f469f8cd923139f), U64_C (0xf0e7f0d317fd23f0),
+ U64_C (0x4a354a6a7f94204a), U64_C (0xda4fda9e95a944da),
+ U64_C (0x587d58fa25b0a258), U64_C (0xc903c906ca8fcfc9),
+ U64_C (0x29a429558d527c29), U64_C (0x0a280a5022145a0a),
+ U64_C (0xb1feb1e14f7f50b1), U64_C (0xa0baa0691a5dc9a0),
+ U64_C (0x6bb16b7fdad6146b), U64_C (0x852e855cab17d985),
+ U64_C (0xbdcebd8173673cbd), U64_C (0x5d695dd234ba8f5d),
+ U64_C (0x1040108050209010), U64_C (0xf4f7f4f303f507f4),
+ U64_C (0xcb0bcb16c08bddcb), U64_C (0x3ef83eedc67cd33e),
+ U64_C (0x05140528110a2d05), U64_C (0x6781671fe6ce7867),
+ U64_C (0xe4b7e47353d597e4), U64_C (0x279c2725bb4e0227),
+ U64_C (0x4119413258827341), U64_C (0x8b168b2c9d0ba78b),
+ U64_C (0xa7a6a7510153f6a7), U64_C (0x7de97dcf94fab27d),
+ U64_C (0x956e95dcfb374995), U64_C (0xd847d88e9fad56d8),
+ U64_C (0xfbcbfb8b30eb70fb), U64_C (0xee9fee2371c1cdee),
+ U64_C (0x7ced7cc791f8bb7c), U64_C (0x66856617e3cc7166),
+ U64_C (0xdd53dda68ea77bdd), U64_C (0x175c17b84b2eaf17),
+ U64_C (0x47014702468e4547), U64_C (0x9e429e84dc211a9e),
+ U64_C (0xca0fca1ec589d4ca), U64_C (0x2db42d75995a582d),
+ U64_C (0xbfc6bf9179632ebf), U64_C (0x071c07381b0e3f07),
+ U64_C (0xad8ead012347acad), U64_C (0x5a755aea2fb4b05a),
+ U64_C (0x8336836cb51bef83), U64_C (0x33cc3385ff66b633),
+ U64_C (0x6391633ff2c65c63), U64_C (0x020802100a041202),
+ U64_C (0xaa92aa39384993aa), U64_C (0x71d971afa8e2de71),
+ U64_C (0xc807c80ecf8dc6c8), U64_C (0x196419c87d32d119),
+ U64_C (0x4939497270923b49), U64_C (0xd943d9869aaf5fd9),
+ U64_C (0xf2eff2c31df931f2), U64_C (0xe3abe34b48dba8e3),
+ U64_C (0x5b715be22ab6b95b), U64_C (0x881a8834920dbc88),
+ U64_C (0x9a529aa4c8293e9a), U64_C (0x2698262dbe4c0b26),
+ U64_C (0x32c8328dfa64bf32), U64_C (0xb0fab0e94a7d59b0),
+ U64_C (0xe983e91b6acff2e9), U64_C (0x0f3c0f78331e770f),
+ U64_C (0xd573d5e6a6b733d5), U64_C (0x803a8074ba1df480),
+ U64_C (0xbec2be997c6127be), U64_C (0xcd13cd26de87ebcd),
+ U64_C (0x34d034bde4688934), U64_C (0x483d487a75903248),
+ U64_C (0xffdbffab24e354ff), U64_C (0x7af57af78ff48d7a),
+ U64_C (0x907a90f4ea3d6490), U64_C (0x5f615fc23ebe9d5f),
+ U64_C (0x2080201da0403d20), U64_C (0x68bd6867d5d00f68),
+ U64_C (0x1a681ad07234ca1a), U64_C (0xae82ae192c41b7ae),
+ U64_C (0xb4eab4c95e757db4), U64_C (0x544d549a19a8ce54),
+ U64_C (0x937693ece53b7f93), U64_C (0x2288220daa442f22),
+ U64_C (0x648d6407e9c86364), U64_C (0xf1e3f1db12ff2af1),
+ U64_C (0x73d173bfa2e6cc73), U64_C (0x124812905a248212),
+ U64_C (0x401d403a5d807a40), U64_C (0x0820084028104808),
+ U64_C (0xc32bc356e89b95c3), U64_C (0xec97ec337bc5dfec),
+ U64_C (0xdb4bdb9690ab4ddb), U64_C (0xa1bea1611f5fc0a1),
+ U64_C (0x8d0e8d1c8307918d), U64_C (0x3df43df5c97ac83d),
+ U64_C (0x976697ccf1335b97), U64_C (0x0000000000000000),
+ U64_C (0xcf1bcf36d483f9cf), U64_C (0x2bac2b4587566e2b),
+ U64_C (0x76c57697b3ece176), U64_C (0x82328264b019e682),
+ U64_C (0xd67fd6fea9b128d6), U64_C (0x1b6c1bd87736c31b),
+ U64_C (0xb5eeb5c15b7774b5), U64_C (0xaf86af112943beaf),
+ U64_C (0x6ab56a77dfd41d6a), U64_C (0x505d50ba0da0ea50),
+ U64_C (0x450945124c8a5745), U64_C (0xf3ebf3cb18fb38f3),
+ U64_C (0x30c0309df060ad30), U64_C (0xef9bef2b74c3c4ef),
+ U64_C (0x3ffc3fe5c37eda3f), U64_C (0x554955921caac755),
+ U64_C (0xa2b2a2791059dba2), U64_C (0xea8fea0365c9e9ea),
+ U64_C (0x6589650fecca6a65), U64_C (0xbad2bab9686903ba),
+ U64_C (0x2fbc2f65935e4a2f), U64_C (0xc027c04ee79d8ec0),
+ U64_C (0xde5fdebe81a160de), U64_C (0x1c701ce06c38fc1c),
+ U64_C (0xfdd3fdbb2ee746fd), U64_C (0x4d294d52649a1f4d),
+ U64_C (0x927292e4e0397692), U64_C (0x75c9758fbceafa75),
+ U64_C (0x061806301e0c3606), U64_C (0x8a128a249809ae8a),
+ U64_C (0xb2f2b2f940794bb2), U64_C (0xe6bfe66359d185e6),
+ U64_C (0x0e380e70361c7e0e), U64_C (0x1f7c1ff8633ee71f),
+ U64_C (0x62956237f7c45562), U64_C (0xd477d4eea3b53ad4),
+ U64_C (0xa89aa829324d81a8), U64_C (0x966296c4f4315296),
+ U64_C (0xf9c3f99b3aef62f9), U64_C (0xc533c566f697a3c5),
+ U64_C (0x25942535b14a1025), U64_C (0x597959f220b2ab59),
+ U64_C (0x842a8454ae15d084), U64_C (0x72d572b7a7e4c572),
+ U64_C (0x39e439d5dd72ec39), U64_C (0x4c2d4c5a6198164c),
+ U64_C (0x5e655eca3bbc945e), U64_C (0x78fd78e785f09f78),
+ U64_C (0x38e038ddd870e538), U64_C (0x8c0a8c148605988c),
+ U64_C (0xd163d1c6b2bf17d1), U64_C (0xa5aea5410b57e4a5),
+ U64_C (0xe2afe2434dd9a1e2), U64_C (0x6199612ff8c24e61),
+ U64_C (0xb3f6b3f1457b42b3), U64_C (0x21842115a5423421),
+ U64_C (0x9c4a9c94d625089c), U64_C (0x1e781ef0663cee1e),
+ U64_C (0x4311432252866143), U64_C (0xc73bc776fc93b1c7),
+ U64_C (0xfcd7fcb32be54ffc), U64_C (0x0410042014082404),
+ U64_C (0x515951b208a2e351), U64_C (0x995e99bcc72f2599),
+ U64_C (0x6da96d4fc4da226d), U64_C (0x0d340d68391a650d),
+ U64_C (0xfacffa8335e979fa), U64_C (0xdf5bdfb684a369df),
+ U64_C (0x7ee57ed79bfca97e), U64_C (0x2490243db4481924),
+ U64_C (0x3bec3bc5d776fe3b), U64_C (0xab96ab313d4b9aab),
+ U64_C (0xce1fce3ed181f0ce), U64_C (0x1144118855229911),
+ U64_C (0x8f068f0c8903838f), U64_C (0x4e254e4a6b9c044e),
+ U64_C (0xb7e6b7d1517366b7), U64_C (0xeb8beb0b60cbe0eb),
+ U64_C (0x3cf03cfdcc78c13c), U64_C (0x813e817cbf1ffd81),
+ U64_C (0x946a94d4fe354094), U64_C (0xf7fbf7eb0cf31cf7),
+ U64_C (0xb9deb9a1676f18b9), U64_C (0x134c13985f268b13),
+ U64_C (0x2cb02c7d9c58512c), U64_C (0xd36bd3d6b8bb05d3),
+ U64_C (0xe7bbe76b5cd38ce7), U64_C (0x6ea56e57cbdc396e),
+ U64_C (0xc437c46ef395aac4), U64_C (0x030c03180f061b03),
+ U64_C (0x5645568a13acdc56), U64_C (0x440d441a49885e44),
+ U64_C (0x7fe17fdf9efea07f), U64_C (0xa99ea921374f88a9),
+ U64_C (0x2aa82a4d8254672a), U64_C (0xbbd6bbb16d6b0abb),
+ U64_C (0xc123c146e29f87c1), U64_C (0x535153a202a6f153),
+ U64_C (0xdc57dcae8ba572dc), U64_C (0x0b2c0b582716530b),
+ U64_C (0x9d4e9d9cd327019d), U64_C (0x6cad6c47c1d82b6c),
+ U64_C (0x31c43195f562a431), U64_C (0x74cd7487b9e8f374),
+ U64_C (0xf6fff6e309f115f6), U64_C (0x4605460a438c4c46),
+ U64_C (0xac8aac092645a5ac), U64_C (0x891e893c970fb589),
+ U64_C (0x145014a04428b414), U64_C (0xe1a3e15b42dfbae1),
+ U64_C (0x165816b04e2ca616), U64_C (0x3ae83acdd274f73a),
+ U64_C (0x69b9696fd0d20669), U64_C (0x092409482d124109),
+ U64_C (0x70dd70a7ade0d770), U64_C (0xb6e2b6d954716fb6),
+ U64_C (0xd067d0ceb7bd1ed0), U64_C (0xed93ed3b7ec7d6ed),
+ U64_C (0xcc17cc2edb85e2cc), U64_C (0x4215422a57846842),
+ U64_C (0x985a98b4c22d2c98), U64_C (0xa4aaa4490e55eda4),
+ U64_C (0x28a0285d88507528), U64_C (0x5c6d5cda31b8865c),
+ U64_C (0xf8c7f8933fed6bf8), U64_C (0x86228644a411c286),
+ } }
+};
+#define C tab.C
+#define C0 C[0]
+#define C1 C[1]
+#define C2 C[2]
+#define C3 C[3]
+#define C4 C[4]
+#define C5 C[5]
+#define C6 C[6]
+#define C7 C[7]
+#define rc tab.RC
+
+
+
+static unsigned int
+whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks);
+
+
+
+static void
+whirlpool_init (void *ctx, unsigned int flags)
+{
+ whirlpool_context_t *context = ctx;
+
+ memset (context, 0, sizeof (*context));
+
+ context->bctx.blocksize_shift = _gcry_ctz(BLOCK_SIZE);
+ context->bctx.bwrite = whirlpool_transform;
+ if ((flags & GCRY_MD_FLAG_BUGEMU1))
+ {
+ memset (&context->bugemu, 0, sizeof context->bugemu);
+ context->use_bugemu = 1;
+ }
+ else
+ context->use_bugemu = 0;
+}
+
+
+#ifdef USE_AMD64_ASM
+
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+extern unsigned int
+_gcry_whirlpool_transform_amd64(u64 *state, const unsigned char *data,
+ size_t nblks, const struct whirlpool_tables_s *tables) ASM_FUNC_ABI;
+
+static unsigned int
+whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks)
+{
+ whirlpool_context_t *context = ctx;
+
+ return _gcry_whirlpool_transform_amd64(
+ context->hash_state, data, nblks, &tab) + ASM_EXTRA_STACK;
+}
+
+#else /* USE_AMD64_ASM */
+
+/*
+ * Transform block.
+ */
+static unsigned int
+whirlpool_transform_blk (void *ctx, const unsigned char *data)
+{
+ whirlpool_context_t *context = ctx;
+ whirlpool_block_t data_block;
+ whirlpool_block_t key;
+ whirlpool_block_t state;
+ whirlpool_block_t block;
+ unsigned int r;
+ unsigned int i;
+
+ buffer_to_block (data, data_block, i);
+ block_copy (key, context->hash_state, i);
+ block_copy (state, context->hash_state, i);
+ block_xor (state, data_block, i);
+
+ for (r = 0; r < R; r++)
+ {
+ /* Compute round key K^r. */
+
+ block[0] = (C0[(key[0] >> 56) & 0xFF] ^ C1[(key[7] >> 48) & 0xFF] ^
+ C2[(key[6] >> 40) & 0xFF] ^ C3[(key[5] >> 32) & 0xFF] ^
+ C4[(key[4] >> 24) & 0xFF] ^ C5[(key[3] >> 16) & 0xFF] ^
+ C6[(key[2] >> 8) & 0xFF] ^ C7[(key[1] >> 0) & 0xFF] ^ rc[r]);
+ block[1] = (C0[(key[1] >> 56) & 0xFF] ^ C1[(key[0] >> 48) & 0xFF] ^
+ C2[(key[7] >> 40) & 0xFF] ^ C3[(key[6] >> 32) & 0xFF] ^
+ C4[(key[5] >> 24) & 0xFF] ^ C5[(key[4] >> 16) & 0xFF] ^
+ C6[(key[3] >> 8) & 0xFF] ^ C7[(key[2] >> 0) & 0xFF]);
+ block[2] = (C0[(key[2] >> 56) & 0xFF] ^ C1[(key[1] >> 48) & 0xFF] ^
+ C2[(key[0] >> 40) & 0xFF] ^ C3[(key[7] >> 32) & 0xFF] ^
+ C4[(key[6] >> 24) & 0xFF] ^ C5[(key[5] >> 16) & 0xFF] ^
+ C6[(key[4] >> 8) & 0xFF] ^ C7[(key[3] >> 0) & 0xFF]);
+ block[3] = (C0[(key[3] >> 56) & 0xFF] ^ C1[(key[2] >> 48) & 0xFF] ^
+ C2[(key[1] >> 40) & 0xFF] ^ C3[(key[0] >> 32) & 0xFF] ^
+ C4[(key[7] >> 24) & 0xFF] ^ C5[(key[6] >> 16) & 0xFF] ^
+ C6[(key[5] >> 8) & 0xFF] ^ C7[(key[4] >> 0) & 0xFF]);
+ block[4] = (C0[(key[4] >> 56) & 0xFF] ^ C1[(key[3] >> 48) & 0xFF] ^
+ C2[(key[2] >> 40) & 0xFF] ^ C3[(key[1] >> 32) & 0xFF] ^
+ C4[(key[0] >> 24) & 0xFF] ^ C5[(key[7] >> 16) & 0xFF] ^
+ C6[(key[6] >> 8) & 0xFF] ^ C7[(key[5] >> 0) & 0xFF]);
+ block[5] = (C0[(key[5] >> 56) & 0xFF] ^ C1[(key[4] >> 48) & 0xFF] ^
+ C2[(key[3] >> 40) & 0xFF] ^ C3[(key[2] >> 32) & 0xFF] ^
+ C4[(key[1] >> 24) & 0xFF] ^ C5[(key[0] >> 16) & 0xFF] ^
+ C6[(key[7] >> 8) & 0xFF] ^ C7[(key[6] >> 0) & 0xFF]);
+ block[6] = (C0[(key[6] >> 56) & 0xFF] ^ C1[(key[5] >> 48) & 0xFF] ^
+ C2[(key[4] >> 40) & 0xFF] ^ C3[(key[3] >> 32) & 0xFF] ^
+ C4[(key[2] >> 24) & 0xFF] ^ C5[(key[1] >> 16) & 0xFF] ^
+ C6[(key[0] >> 8) & 0xFF] ^ C7[(key[7] >> 0) & 0xFF]);
+ block[7] = (C0[(key[7] >> 56) & 0xFF] ^ C1[(key[6] >> 48) & 0xFF] ^
+ C2[(key[5] >> 40) & 0xFF] ^ C3[(key[4] >> 32) & 0xFF] ^
+ C4[(key[3] >> 24) & 0xFF] ^ C5[(key[2] >> 16) & 0xFF] ^
+ C6[(key[1] >> 8) & 0xFF] ^ C7[(key[0] >> 0) & 0xFF]);
+ block_copy (key, block, i);
+
+ /* Apply r-th round transformation. */
+
+ block[0] = (C0[(state[0] >> 56) & 0xFF] ^ C1[(state[7] >> 48) & 0xFF] ^
+ C2[(state[6] >> 40) & 0xFF] ^ C3[(state[5] >> 32) & 0xFF] ^
+ C4[(state[4] >> 24) & 0xFF] ^ C5[(state[3] >> 16) & 0xFF] ^
+ C6[(state[2] >> 8) & 0xFF] ^ C7[(state[1] >> 0) & 0xFF] ^ key[0]);
+ block[1] = (C0[(state[1] >> 56) & 0xFF] ^ C1[(state[0] >> 48) & 0xFF] ^
+ C2[(state[7] >> 40) & 0xFF] ^ C3[(state[6] >> 32) & 0xFF] ^
+ C4[(state[5] >> 24) & 0xFF] ^ C5[(state[4] >> 16) & 0xFF] ^
+ C6[(state[3] >> 8) & 0xFF] ^ C7[(state[2] >> 0) & 0xFF] ^ key[1]);
+ block[2] = (C0[(state[2] >> 56) & 0xFF] ^ C1[(state[1] >> 48) & 0xFF] ^
+ C2[(state[0] >> 40) & 0xFF] ^ C3[(state[7] >> 32) & 0xFF] ^
+ C4[(state[6] >> 24) & 0xFF] ^ C5[(state[5] >> 16) & 0xFF] ^
+ C6[(state[4] >> 8) & 0xFF] ^ C7[(state[3] >> 0) & 0xFF] ^ key[2]);
+ block[3] = (C0[(state[3] >> 56) & 0xFF] ^ C1[(state[2] >> 48) & 0xFF] ^
+ C2[(state[1] >> 40) & 0xFF] ^ C3[(state[0] >> 32) & 0xFF] ^
+ C4[(state[7] >> 24) & 0xFF] ^ C5[(state[6] >> 16) & 0xFF] ^
+ C6[(state[5] >> 8) & 0xFF] ^ C7[(state[4] >> 0) & 0xFF] ^ key[3]);
+ block[4] = (C0[(state[4] >> 56) & 0xFF] ^ C1[(state[3] >> 48) & 0xFF] ^
+ C2[(state[2] >> 40) & 0xFF] ^ C3[(state[1] >> 32) & 0xFF] ^
+ C4[(state[0] >> 24) & 0xFF] ^ C5[(state[7] >> 16) & 0xFF] ^
+ C6[(state[6] >> 8) & 0xFF] ^ C7[(state[5] >> 0) & 0xFF] ^ key[4]);
+ block[5] = (C0[(state[5] >> 56) & 0xFF] ^ C1[(state[4] >> 48) & 0xFF] ^
+ C2[(state[3] >> 40) & 0xFF] ^ C3[(state[2] >> 32) & 0xFF] ^
+ C4[(state[1] >> 24) & 0xFF] ^ C5[(state[0] >> 16) & 0xFF] ^
+ C6[(state[7] >> 8) & 0xFF] ^ C7[(state[6] >> 0) & 0xFF] ^ key[5]);
+ block[6] = (C0[(state[6] >> 56) & 0xFF] ^ C1[(state[5] >> 48) & 0xFF] ^
+ C2[(state[4] >> 40) & 0xFF] ^ C3[(state[3] >> 32) & 0xFF] ^
+ C4[(state[2] >> 24) & 0xFF] ^ C5[(state[1] >> 16) & 0xFF] ^
+ C6[(state[0] >> 8) & 0xFF] ^ C7[(state[7] >> 0) & 0xFF] ^ key[6]);
+ block[7] = (C0[(state[7] >> 56) & 0xFF] ^ C1[(state[6] >> 48) & 0xFF] ^
+ C2[(state[5] >> 40) & 0xFF] ^ C3[(state[4] >> 32) & 0xFF] ^
+ C4[(state[3] >> 24) & 0xFF] ^ C5[(state[2] >> 16) & 0xFF] ^
+ C6[(state[1] >> 8) & 0xFF] ^ C7[(state[0] >> 0) & 0xFF] ^ key[7]);
+ block_copy (state, block, i);
+ }
+
+ /* Compression. */
+
+ block_xor (context->hash_state, data_block, i);
+ block_xor (context->hash_state, state, i);
+
+ return /*burn_stack*/ 4 * sizeof(whirlpool_block_t) + 2 * sizeof(int) +
+ 4 * sizeof(void*);
+}
+
+static unsigned int
+whirlpool_transform ( void *c, const unsigned char *data, size_t nblks )
+{
+ unsigned int burn;
+
+ do
+ {
+ burn = whirlpool_transform_blk (c, data);
+ data += BLOCK_SIZE;
+ }
+ while (--nblks);
+
+ return burn;
+}
+
+#endif /* !USE_AMD64_ASM */
+
+
+/* Bug compatibility Whirlpool version. */
+static void
+whirlpool_add_bugemu (whirlpool_context_t *context,
+ const void *buffer_arg, size_t buffer_n)
+{
+ const unsigned char *buffer = buffer_arg;
+ u64 buffer_size;
+ unsigned int carry;
+ unsigned int i;
+
+ buffer_size = buffer_n;
+
+ if (context->bugemu.count == BLOCK_SIZE)
+ {
+ /* Flush the buffer. */
+ whirlpool_transform (context, context->bctx.buf, 1);
+ context->bugemu.count = 0;
+ }
+ if (! buffer)
+ return; /* Nothing to add. */
+
+ if (context->bugemu.count)
+ {
+ while (buffer_n && (context->bugemu.count < BLOCK_SIZE))
+ {
+ context->bctx.buf[context->bugemu.count++] = *buffer++;
+ buffer_n--;
+ }
+ whirlpool_add_bugemu (context, NULL, 0);
+ if (!buffer_n)
+ return; /* Done. This is the bug we emulate. */
+ }
+
+ while (buffer_n >= BLOCK_SIZE)
+ {
+ whirlpool_transform (context, buffer, 1);
+ context->bugemu.count = 0;
+ buffer_n -= BLOCK_SIZE;
+ buffer += BLOCK_SIZE;
+ }
+ while (buffer_n && (context->bugemu.count < BLOCK_SIZE))
+ {
+ context->bctx.buf[context->bugemu.count++] = *buffer++;
+ buffer_n--;
+ }
+
+ /* Update bit counter. */
+ carry = 0;
+ buffer_size <<= 3;
+ for (i = 1; i <= 32; i++)
+ {
+ if (! (buffer_size || carry))
+ break;
+
+ carry += context->bugemu.length[32 - i] + (buffer_size & 0xFF);
+ context->bugemu.length[32 - i] = carry;
+ buffer_size >>= 8;
+ carry >>= 8;
+ }
+ gcry_assert (! (buffer_size || carry));
+}
+
+
+/* Bug compatibility Whirlpool version. */
+static void
+whirlpool_final_bugemu (void *ctx)
+{
+ whirlpool_context_t *context = ctx;
+ unsigned int i;
+
+ /* Flush. */
+ whirlpool_add_bugemu (context, NULL, 0);
+
+ /* Pad. */
+ context->bctx.buf[context->bugemu.count++] = 0x80;
+
+ if (context->bugemu.count > 32)
+ {
+ /* An extra block is necessary. */
+ while (context->bugemu.count < 64)
+ context->bctx.buf[context->bugemu.count++] = 0;
+ whirlpool_add_bugemu (context, NULL, 0);
+ }
+ while (context->bugemu.count < 32)
+ context->bctx.buf[context->bugemu.count++] = 0;
+
+ /* Add length of message. */
+ memcpy (context->bctx.buf + context->bugemu.count,
+ context->bugemu.length, 32);
+ context->bugemu.count += 32;
+ whirlpool_add_bugemu (context, NULL, 0);
+
+ block_to_buffer (context->bctx.buf, context->hash_state, i);
+}
+
+
+static void
+whirlpool_write (void *ctx, const void *buffer, size_t buffer_n)
+{
+ whirlpool_context_t *context = ctx;
+
+ if (context->use_bugemu)
+ {
+ whirlpool_add_bugemu (context, buffer, buffer_n);
+ }
+ else
+ {
+ u64 old_nblocks = context->bctx.nblocks;
+
+ _gcry_md_block_write (context, buffer, buffer_n);
+
+ gcry_assert (old_nblocks <= context->bctx.nblocks);
+ }
+}
+
+static void
+whirlpool_final (void *ctx)
+{
+ whirlpool_context_t *context = ctx;
+ unsigned int i;
+ u64 t, th, lsb, msb;
+ unsigned char *length;
+
+ if (context->use_bugemu)
+ {
+ whirlpool_final_bugemu (ctx);
+ return;
+ }
+
+ t = context->bctx.nblocks;
+ /* if (sizeof t == sizeof context->bctx.nblocks) */
+ th = context->bctx.nblocks_high;
+ /* else */
+ /* th = context->bctx.nblocks >> 64; In case we ever use u128 */
+
+ /* multiply by 64 to make a byte count */
+ lsb = t << 6;
+ msb = (th << 6) | (t >> 58);
+ /* add the count */
+ t = lsb;
+ if ((lsb += context->bctx.count) < t)
+ msb++;
+ /* multiply by 8 to make a bit count */
+ t = lsb;
+ lsb <<= 3;
+ msb <<= 3;
+ msb |= t >> 61;
+
+ /* Flush. */
+ whirlpool_write (context, NULL, 0);
+
+ /* Pad. */
+ context->bctx.buf[context->bctx.count++] = 0x80;
+
+ if (context->bctx.count > 32)
+ {
+ /* An extra block is necessary. */
+ if (context->bctx.count < 64)
+ memset (&context->bctx.buf[context->bctx.count], 0,
+ 64 - context->bctx.count);
+ context->bctx.count = 64;
+ whirlpool_write (context, NULL, 0);
+ }
+ if (context->bctx.count < 32)
+ memset (&context->bctx.buf[context->bctx.count], 0,
+ 32 - context->bctx.count);
+ context->bctx.count = 32;
+
+ /* Add length of message. */
+ length = context->bctx.buf + context->bctx.count;
+ buf_put_be64(&length[0 * 8], 0);
+ buf_put_be64(&length[1 * 8], 0);
+ buf_put_be64(&length[2 * 8], msb);
+ buf_put_be64(&length[3 * 8], lsb);
+ context->bctx.count += 32;
+ whirlpool_write (context, NULL, 0);
+
+ block_to_buffer (context->bctx.buf, context->hash_state, i);
+}
+
+static byte *
+whirlpool_read (void *ctx)
+{
+ whirlpool_context_t *context = ctx;
+
+ return context->bctx.buf;
+}
+
+gcry_md_spec_t _gcry_digest_spec_whirlpool =
+ {
+ GCRY_MD_WHIRLPOOL, {0, 0},
+ "WHIRLPOOL", NULL, 0, NULL, 64,
+ whirlpool_init, whirlpool_write, whirlpool_final, whirlpool_read, NULL,
+ NULL, NULL,
+ sizeof (whirlpool_context_t)
+ };